framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,2,0.2211
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,4,0.2103
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,128,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,64,0.2065
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,32,0.2066
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,16,0.2201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,8,0.2116
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,512,0.2216
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,256,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,1024,0.2112
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,2048,0.2096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,4096,0.2256
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,8192,0.2296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,16384,0.2290
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,32768,0.2425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,65536,0.2611
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,2,0.2154
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,131072,0.2895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,4,0.2166
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,8,0.2156
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,16,0.2155
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,32,0.2196
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,128,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,64,0.2188
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,256,0.2286
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,512,0.2289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,1024,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,4096,0.2375
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,2048,0.2239
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,8192,0.2385
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,16384,0.2397
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,32768,0.2490
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,65536,0.2688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,131072,0.2844
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,2,0.2300
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,4,0.2209
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,8,0.2207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,16,0.2227
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,32,0.2317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,64,0.2247
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,128,0.2296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,256,0.2237
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,512,0.2300
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,1024,0.2251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,2048,0.2260
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,4096,0.2508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,32768,0.2512
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,8192,0.2506
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,16384,0.2425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,65536,0.2753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,131072,0.3071
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,2,0.2330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,4,0.2332
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,8,0.2354
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,16,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,32,0.2369
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,64,0.2347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,256,0.2387
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,128,0.2329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,512,0.2402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,1024,0.2336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,2048,0.2354
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,4096,0.2614
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,8192,0.2640
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,16384,0.2547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,32768,0.2630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,65536,0.2921
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,131072,0.3302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,2,0.2461
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,4,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,8,0.2532
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,32,0.2504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,16,0.2458
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,64,0.2481
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,128,0.2517
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,512,0.2456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,256,0.2460
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,1024,0.2524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,2048,0.2539
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,4096,0.2789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,8192,0.2804
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,16384,0.2737
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,32768,0.2819
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,65536,0.3104
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,131072,0.3664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,2,0.2629
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,4,0.2692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,8,0.2657
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,16,0.2670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,32,0.2637
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,64,0.2623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,256,0.2674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,128,0.2612
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,512,0.2640
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,1024,0.2711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,2048,0.2814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,4096,0.2907
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,8192,0.3032
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,16384,0.2960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,32768,0.3153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,65536,0.3506
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,131072,0.4201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,2,0.2946
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,4,0.2950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,8,0.3004
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,32,0.2960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,16,0.2948
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,64,0.2934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,128,0.2934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,256,0.2949
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,1024,0.3029
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,512,0.2970
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,2048,0.3095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,8192,0.3384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,4096,0.3225
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,16384,0.3372
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,32768,0.3642
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,65536,0.4242
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,131072,0.5374
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,2,0.3722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,4,0.3681
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,8,0.3679
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,16,0.3714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,32,0.3681
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,64,0.3680
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,128,0.3690
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,256,0.3715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,512,0.3733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,1024,0.3784
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,2048,0.3896
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,4096,0.4147
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,8192,0.4339
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,16384,0.4438
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,32768,0.4892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,65536,0.5927
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,131072,0.7830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,4,0.5107
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,2,0.5097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,16,0.5122
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,32,0.5134
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,8,0.5094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,64,0.5135
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,128,0.5190
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,512,0.5275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,256,0.5223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,1024,0.5432
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,4096,0.5993
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,2048,0.5661
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,8192,0.6285
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,16384,0.6852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,32768,0.8062
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,65536,1.0317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,131072,1.5882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,8,0.8108
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,4,0.8132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,16,0.8122
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,2,0.8130
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,32,0.8102
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,64,0.8176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,512,0.8348
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,2048,0.9102
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,256,0.8249
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,4096,0.9713
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,128,0.8168
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,16384,1.1423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,1024,0.8614
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,8192,1.0275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,32768,1.3700
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,65536,1.8756
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,4,1.4374
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,8,1.4397
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,16,1.4389
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,32,1.4440
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,2,1.4411
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,64,1.4526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,128,1.4553
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,512,1.4913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,2048,1.6359
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,256,1.4706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,1024,1.5429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,4096,1.7410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,2,0.1798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,4,0.1850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,8192,1.8514
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,16384,2.0684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,16,0.1928
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,32,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,64,0.1805
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,128,0.1838
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,8,0.1804
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,32768,2.4956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,256,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,512,0.1824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,1024,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,2048,0.1824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,4096,0.2031
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,8192,0.2190
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,16384,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,32768,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,65536,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,131072,0.2435
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,4,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,2,0.1958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,8,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,32,0.1943
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,16,0.1982
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,64,0.2023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,128,0.1907
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,256,0.2017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,1024,0.1940
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,512,0.1946
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,2048,0.2054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,4096,0.2192
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,8192,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,16384,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,32768,0.2400
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,65536,0.2422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,131072,0.2582
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,2,0.1989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,4,0.2057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,8,0.1993
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,16,0.2011
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,32,0.2076
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,64,0.1979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,128,0.2002
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,256,0.2014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,512,0.2075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,1024,0.2066
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,2048,0.2051
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,4096,0.2266
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,8192,0.2295
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,16384,0.2162
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,32768,0.2211
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,65536,0.2427
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,131072,0.2812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,2,0.2067
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,4,0.2153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,8,0.2131
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,16,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,64,0.2057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,32,0.2140
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,128,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,256,0.2070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,512,0.2163
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,1024,0.2099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,2048,0.2173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,4096,0.2279
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,8192,0.2321
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,16384,0.2334
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,32768,0.2346
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,131072,0.3016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,65536,0.2670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,2,0.2197
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,8,0.2238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,16,0.2160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,32,0.2219
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,64,0.2248
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,128,0.2182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,256,0.2211
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,4,0.2187
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,512,0.2197
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,1024,0.2186
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,2048,0.2329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,4096,0.2478
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,8192,0.2530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,16384,0.2436
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,32768,0.2571
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,65536,0.2842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,131072,0.3305
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,2,0.2360
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,4,0.2320
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,8,0.2344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,16,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,32,0.2300
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,128,0.2327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,64,0.2348
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,256,0.2320
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,512,0.2383
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,1024,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,2048,0.2406
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,4096,0.2598
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,8192,0.2678
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,32768,0.2761
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,65536,0.3223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,16384,0.2633
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,131072,0.3930
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,2,0.2521
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,4,0.2529
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,8,0.2514
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,16,0.2511
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,32,0.2529
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,128,0.2512
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,64,0.2541
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,256,0.2560
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,512,0.2545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,1024,0.2586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,2048,0.2649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,4096,0.2859
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,8192,0.2918
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,16384,0.2983
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,32768,0.3288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,65536,0.3847
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,2,0.3195
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,131072,0.4899
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,4,0.3142
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,8,0.3140
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,16,0.3130
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,32,0.3142
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,64,0.3147
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,128,0.3150
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,256,0.3121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,2048,0.3394
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,512,0.3166
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,1024,0.3242
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,4096,0.3574
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,8192,0.3760
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,16384,0.3917
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,2,0.4273
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,65536,0.5324
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,32768,0.4369
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,4,0.4264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,8,0.4262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,16,0.4260
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,131072,0.7264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,32,0.4266
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,64,0.4286
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,128,0.4275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,256,0.4364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,512,0.4363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,1024,0.4506
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,2048,0.4769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,4096,0.5091
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,8192,0.5407
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,16384,0.5951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,32768,0.7135
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,2,0.6520
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,4,0.6516
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,65536,0.9476
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,8,0.6528
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,32,0.7123
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,16,0.6534
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,131072,1.4980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,64,0.6626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,128,0.6689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,256,0.6737
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,512,0.6877
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,2048,0.7618
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,1024,0.7130
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,16384,0.9882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,4096,0.8219
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,8192,0.8771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,2,1.1562
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,8,1.1521
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,16,1.1592
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,32768,1.2190
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,4,1.2895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,65536,1.7119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,32,1.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,64,1.1645
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,128,1.1730
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,256,1.1836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,2048,1.3555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,512,1.2085
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,4096,1.4581
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,1024,1.2554
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,2,0.1637
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,16384,1.7789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,4,0.1663
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,8,0.1625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,8192,1.5660
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,16,0.1658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,32,0.1656
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,64,0.1685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,128,0.1751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,256,0.1658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,512,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,1024,0.1860
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,4096,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,8192,0.1870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,16384,0.1778
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,32768,0.1951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,2048,0.1668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,32768,2.2136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,65536,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,131072,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,2,0.1772
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,4,0.1762
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,8,0.1742
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,16,0.1846
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,32,0.1723
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,128,0.1739
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,64,0.1861
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,256,0.1815
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,512,0.1757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,1024,0.1753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,2048,0.1844
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,4096,0.2045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,8192,0.1968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,16384,0.2011
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,32768,0.1958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,65536,0.2183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,2,0.1826
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,131072,0.2444
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,4,0.2044
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,8,0.1903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,16,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,32,0.1875
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,64,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,128,0.1814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,256,0.1787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,512,0.1835
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,1024,0.2045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,2048,0.1930
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,4096,0.2079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,8192,0.2052
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,16384,0.2014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,32768,0.2064
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,65536,0.2230
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,131072,0.2565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,2,0.1986
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,4,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,8,0.1950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,16,0.1941
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,32,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,128,0.1911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,64,0.1890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,256,0.1976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,512,0.1890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,1024,0.1985
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,2048,0.1965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,4096,0.2111
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,8192,0.2239
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,16384,0.2100
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,32768,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,65536,0.2520
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,4,0.2047
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,2,0.1988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,8,0.1991
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,16,0.1989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,32,0.2023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,131072,0.2872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,64,0.1998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,128,0.2059
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,256,0.2041
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,512,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,1024,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,2048,0.2109
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,4096,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,8192,0.2312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,16384,0.2334
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,32768,0.2384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,65536,0.2711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,131072,0.3129
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,2,0.2112
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,4,0.2133
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,16,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,8,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,32,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,64,0.2137
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,128,0.2091
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,256,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,512,0.2112
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,1024,0.2184
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,2048,0.2209
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,4096,0.2365
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,8192,0.2531
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,16384,0.2443
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,32768,0.2581
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,2,0.2279
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,131072,0.3691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,65536,0.2975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,4,0.2356
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,8,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,16,0.2301
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,32,0.2327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,64,0.2267
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,128,0.2275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,256,0.2265
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,512,0.2288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,1024,0.2425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,4096,0.2561
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,2048,0.2393
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,16384,0.2719
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,8192,0.2698
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,32768,0.2981
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,131072,0.4649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,2,0.2796
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,8,0.2777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,4,0.2770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,65536,0.3571
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,16,0.2783
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,32,0.2771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,64,0.2784
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,128,0.2797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,256,0.2934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,2048,0.3010
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,512,0.2809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,4096,0.3243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,1024,0.2875
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,8192,0.3415
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,16384,0.3548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,32768,0.4001
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,65536,0.5000
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,2,0.3705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,131072,0.6925
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,4,0.3695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,8,0.3713
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,16,0.3707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,32,0.4041
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,64,0.3722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,128,0.4017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,256,0.3738
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,512,0.3814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,1024,0.3944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,4096,0.4506
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,2048,0.4214
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,8192,0.4821
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,16384,0.5371
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,32768,0.6556
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,65536,0.8865
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,4,0.5414
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,8,0.5421
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,32,0.5416
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,2,0.5408
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,131072,1.4419
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,64,0.5425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,16,0.5394
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,128,0.5453
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,256,0.5524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,512,0.5654
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,2048,0.6390
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,4096,0.6992
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,16384,0.8676
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,1024,0.5924
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,8192,0.7537
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,2,0.9116
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,32768,1.0979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,4,0.9140
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,65536,1.6016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,8,0.9174
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,16,0.9215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,32,0.9343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,64,0.9371
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,128,0.9413
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,256,0.9522
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,512,0.9744
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,2048,1.1213
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,1024,1.0222
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,16384,1.5451
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,4096,1.2293
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,2,0.1558
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,4,0.1535
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,8,0.1447
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,8192,1.3361
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,64,0.1507
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,128,0.1462
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,256,0.1559
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,512,0.1442
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,32768,1.9703
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,32,0.1519
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,2048,0.1492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,1024,0.1474
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,16,0.1487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,4096,0.1654
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,8192,0.1685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,16384,0.1649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,32768,0.1755
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,65536,0.1909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,2,0.1468
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,131072,0.2209
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,4,0.1453
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,8,0.1469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,16,0.1483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,64,0.1532
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,128,0.1463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,256,0.1392
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,32,0.1543
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,512,0.1446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,1024,0.1433
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,2048,0.1763
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,4096,0.1755
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,8192,0.1748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,16384,0.1672
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,32768,0.1801
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,65536,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,2,0.1513
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,131072,0.2444
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,4,0.1625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,8,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,32,0.1506
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,64,0.1590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,16,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,128,0.1571
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,256,0.1626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,512,0.1645
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,1024,0.1589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,2048,0.1698
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,4096,0.1792
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,8192,0.1885
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,16384,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,32768,0.1847
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,65536,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,131072,0.2440
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,2,0.1710
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,4,0.1666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,8,0.1670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,16,0.1727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,32,0.1681
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,64,0.1766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,128,0.1714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,256,0.1761
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,512,0.1696
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,1024,0.1716
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,2048,0.1841
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,4096,0.1923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,16384,0.1948
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,8192,0.2046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,32768,0.1997
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,65536,0.2318
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,131072,0.2618
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,2,0.1784
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,4,0.1829
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,8,0.1824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,16,0.1820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,32,0.1782
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,64,0.1786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,256,0.1757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,128,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,512,0.1808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,1024,0.1871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,2048,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,4096,0.2025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,16384,0.2122
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,8192,0.2079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,32768,0.2265
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,65536,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,131072,0.2951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,2,0.1904
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,4,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,8,0.1860
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,16,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,32,0.1939
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,64,0.1911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,128,0.1876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,512,0.1927
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,256,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,1024,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,2048,0.2022
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,4096,0.2194
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,8192,0.2337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,16384,0.2307
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,32768,0.2425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,131072,0.3536
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,65536,0.2842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,2,0.2014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,4,0.2001
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,8,0.2020
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,16,0.2029
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,32,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,64,0.2043
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,256,0.2024
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,128,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,512,0.2087
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,2048,0.2316
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,1024,0.2177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,4096,0.2520
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,8192,0.2630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,16384,0.2688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,32768,0.2888
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,65536,0.3491
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,2,0.2101
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,131072,0.4579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,8,0.2144
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,16,0.2145
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,4,0.2112
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,32,0.2135
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,64,0.2113
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,128,0.2141
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,512,0.2273
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,1024,0.2470
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,2048,0.2787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,256,0.2193
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,4096,0.3003
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,8192,0.3139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,16384,0.3276
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,32768,0.3715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,65536,0.4701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,2,0.2470
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,131072,0.6658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,4,0.2464
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,8,0.2420
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,16,0.2422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,32,0.2424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,64,0.2429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,128,0.2440
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,256,0.2606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,512,0.2769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,1024,0.3111
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,2048,0.3770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,4096,0.4027
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,8192,0.4332
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,16384,0.4872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,32768,0.6066
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,65536,0.8402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,2,0.3161
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,4,0.3144
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,131072,1.3907
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,16,0.3147
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,8,0.3148
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,32,0.3152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,64,0.3142
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,128,0.3189
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,256,0.3382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,512,0.3693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,2048,0.5610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,4096,0.6152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,1024,0.4340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,8192,0.6691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,16384,0.7837
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,65536,1.5131
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,32768,1.0110
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,2,0.4592
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,4,0.4599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,16,0.4584
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,8,0.4603
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,32,0.4574
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,64,0.4576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,128,0.4649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,256,0.5003
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,512,0.5590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,1024,0.6853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,2048,0.9302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,2,0.1409
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,16384,1.3347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,4096,1.0089
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,4,0.1360
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,8,0.1276
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,16,0.1256
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,8192,1.1190
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,32,0.1423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,64,0.1361
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,128,0.1442
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,256,0.1378
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,512,0.1299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,1024,0.1280
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,2048,0.1460
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,4096,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,8192,0.1598
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,16384,0.1497
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,32768,0.1563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,32768,1.7628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,65536,0.1688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,131072,0.2012
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,2,0.1421
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,4,0.1422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,8,0.1443
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,16,0.1375
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,32,0.1351
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,64,0.1432
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,128,0.1441
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,256,0.1359
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,1024,0.1358
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,512,0.1442
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,2048,0.1425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,4096,0.1501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,8192,0.1642
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,16384,0.1617
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,32768,0.1584
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,65536,0.1821
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,131072,0.2248
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,2,0.1422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,4,0.1462
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,8,0.1503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,16,0.1550
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,32,0.1402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,64,0.1473
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,128,0.1402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,256,0.1504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,512,0.1481
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,1024,0.1539
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,2048,0.1487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,4096,0.1767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,8192,0.1662
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,16384,0.1644
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,65536,0.2069
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,131072,0.2343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,2,0.1609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,4,0.1624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,8,0.1505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,16,0.1531
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,32768,0.1888
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,32,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,64,0.1542
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,128,0.1637
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,256,0.1519
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,512,0.1526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,1024,0.1624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,2048,0.1668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,4096,0.1865
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,8192,0.1932
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,16384,0.1863
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,32768,0.1925
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,65536,0.2172
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,131072,0.2535
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,2,0.1617
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,4,0.1726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,8,0.1695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,16,0.1712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,32,0.1656
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,64,0.1650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,128,0.1699
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,256,0.1745
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,512,0.1727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,2048,0.1829
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,1024,0.1701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,4096,0.2011
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,8192,0.2054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,32768,0.2124
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,65536,0.2499
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,131072,0.2978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,2,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,4,0.1706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,16384,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,8,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,16,0.1720
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,32,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,64,0.1716
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,128,0.1726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,256,0.1766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,512,0.1808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,1024,0.1949
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,2048,0.1988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,4096,0.2234
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,8192,0.2263
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,16384,0.2222
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,32768,0.2390
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,65536,0.2771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,131072,0.3472
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,2,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,4,0.1918
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,16,0.1882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,8,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,32,0.1886
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,64,0.1836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,128,0.1894
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,256,0.1944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,512,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,1024,0.2113
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,2048,0.2259
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,4096,0.2434
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,8192,0.2525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,16384,0.2569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,32768,0.2815
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,65536,0.3462
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,2,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,131072,0.4510
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,4,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,8,0.1965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,16,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,64,0.2001
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,32,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,256,0.2132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,512,0.2178
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,1024,0.2364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,2048,0.2689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,4096,0.2897
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,128,0.2059
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,8192,0.3055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,16384,0.3215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,32768,0.3630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,65536,0.4646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,2,0.2313
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,4,0.2313
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,131072,0.6576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,8,0.2316
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,16,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,32,0.2343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,64,0.2317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,128,0.2332
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,256,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,512,0.2634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,2048,0.3639
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,1024,0.3020
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,4096,0.3929
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,8192,0.4240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,16384,0.4757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,32768,0.5915
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,2,0.2904
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,4,0.2920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,65536,0.8219
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,8,0.2912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,16,0.2903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,32,0.2917
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,131072,1.3814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,64,0.2913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,128,0.2960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,256,0.3146
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,512,0.3441
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,2048,0.5329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,1024,0.4099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,16384,0.7543
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,4096,0.5884
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,8192,0.6450
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,32768,0.9871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,2,0.4097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,4,0.4097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,65536,1.4864
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,8,0.4079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,16,0.4085
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,32,0.4088
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,64,0.4114
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,128,0.4191
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,256,0.4508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,2048,0.8714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,512,0.5067
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,2,0.1256
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,4096,0.9553
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,1024,0.6250
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,4,0.1263
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,8192,1.0605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,32768,1.7013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,8,0.1238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,16,0.1276
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,32,0.1362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,64,0.1183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,16384,1.2727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,128,0.1193
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,256,0.1255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,512,0.1279
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,1024,0.1252
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,2048,0.1384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,4096,0.1545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,8192,0.1381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,16384,0.1343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,32768,0.1501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,65536,0.1685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,4,0.1299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,8,0.1422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,131072,0.1947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,16,0.1390
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,32,0.1298
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,2,0.1305
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,64,0.1302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,128,0.1297
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,256,0.1417
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,1024,0.1339
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,512,0.1419
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,2048,0.1442
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,4096,0.1440
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,8192,0.1500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,16384,0.1464
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,32768,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,65536,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,131072,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,2,0.1377
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,4,0.1482
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,8,0.1433
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,16,0.1381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,32,0.1363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,64,0.1503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,128,0.1401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,256,0.1395
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,512,0.1466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,1024,0.1423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,2048,0.1461
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,4096,0.1668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,8192,0.1731
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,16384,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,32768,0.1722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,65536,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,131072,0.2279
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,2,0.1443
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,8,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,4,0.1460
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,16,0.1499
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,32,0.1545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,64,0.1504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,128,0.1525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,512,0.1464
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,1024,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,256,0.1465
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,4096,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,2048,0.1600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,8192,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,16384,0.1853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,32768,0.1932
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,65536,0.2121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,131072,0.2610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,2,0.1572
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,4,0.1655
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,16,0.1607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,32,0.1577
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,128,0.1624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,8,0.1569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,256,0.1694
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,64,0.1576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,1024,0.1725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,4096,0.1948
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,2048,0.1856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,8192,0.2068
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,16384,0.1989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,512,0.1623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,32768,0.2195
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,65536,0.2443
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,131072,0.2918
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,2,0.1645
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,4,0.1706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,8,0.1692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,16,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,32,0.1707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,64,0.1657
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,256,0.1668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,1024,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,512,0.1711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,128,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,2048,0.1971
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,4096,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,8192,0.2247
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,16384,0.2217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,32768,0.2330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,65536,0.2782
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,131072,0.3501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,2,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,4,0.1869
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,8,0.1848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,16,0.1866
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,32,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,64,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,128,0.1882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,256,0.1895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,512,0.2014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,1024,0.2067
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,2048,0.2243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,4096,0.2422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,16384,0.2579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,8192,0.2503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,32768,0.2794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,65536,0.3431
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,131072,0.4472
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,2,0.1895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,4,0.1920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,8,0.1923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,32,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,64,0.1940
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,16,0.1952
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,128,0.1997
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,256,0.2068
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,512,0.2146
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,2048,0.2647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,1024,0.2305
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,4096,0.2842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,8192,0.3001
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,16384,0.3176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,32768,0.3636
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,65536,0.4610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,4,0.2242
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,2,0.2271
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,131072,0.6538
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,32,0.2272
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,8,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,64,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,128,0.2259
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,256,0.2388
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,16,0.2277
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,512,0.2575
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,1024,0.2900
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,2048,0.3577
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,4096,0.3798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,16384,0.4682
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,8192,0.4097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,32768,0.5844
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,65536,0.8132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,2,0.2815
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,4,0.2800
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,131072,1.3757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,16,0.2796
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,32,0.2798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,64,0.2800
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,8,0.2783
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,128,0.2860
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,256,0.3049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,512,0.3359
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,2048,0.5211
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,4096,0.5762
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,1024,0.3945
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,8192,0.6333
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,16384,0.7451
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,32768,0.9785
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,65536,1.4876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,2,0.3875
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,4,0.3885
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,8,0.4100
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,16,0.3876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,32,0.3878
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,64,0.3885
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,128,0.3963
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,256,0.4290
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,512,0.4830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,2048,0.8410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,1024,0.6012
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,16384,1.2443
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,2,0.1357
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,4,0.1194
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,4096,0.9270
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,8192,1.0369
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,8,0.1327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,16,0.1153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,32,0.1190
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,64,0.1317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,32768,1.6811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,128,0.1353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,256,0.1154
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,1024,0.1196
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,2048,0.1251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,4096,0.1299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,8192,0.1525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,512,0.1271
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,16384,0.1541
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,32768,0.1362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,65536,0.1605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,2,0.1257
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,131072,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,4,0.1334
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,8,0.1245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,16,0.1271
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,32,0.1298
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,64,0.1337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,128,0.1364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,256,0.1331
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,512,0.1275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,1024,0.1257
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,2048,0.1279
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,8192,0.1459
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,16384,0.1496
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,32768,0.1594
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,4096,0.1470
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,2,0.1363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,4,0.1400
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,8,0.1319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,131072,0.2050
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,65536,0.1785
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,16,0.1401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,64,0.1360
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,32,0.1337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,256,0.1426
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,512,0.1363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,1024,0.1443
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,128,0.1399
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,2048,0.1421
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,4096,0.1523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,8192,0.1700
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,16384,0.1607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,32768,0.1618
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,65536,0.1932
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,131072,0.2262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,2,0.1446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,4,0.1439
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,8,0.1543
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,16,0.1463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,32,0.1530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,64,0.1440
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,128,0.1538
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,256,0.1444
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,512,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,1024,0.1480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,2048,0.1571
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,4096,0.1752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,8192,0.1845
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,16384,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,32768,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,65536,0.2119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,131072,0.2576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,2,0.1528
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,4,0.1593
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,8,0.1661
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,32,0.1546
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,64,0.1605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,128,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,256,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,512,0.1563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,1024,0.1666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,2048,0.1855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,16,0.1548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,4096,0.1906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,8192,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,32768,0.2121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,16384,0.2017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,65536,0.2347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,131072,0.2853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,2,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,4,0.1586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,16,0.1682
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,8,0.1616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,32,0.1641
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,64,0.1644
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,128,0.1626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,256,0.1712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,512,0.1658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,1024,0.1802
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,2048,0.1989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,8192,0.2171
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,16384,0.2165
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,32768,0.2329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,65536,0.2801
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,4096,0.2111
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,131072,0.3397
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,2,0.1752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,4,0.1733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,8,0.1778
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,16,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,32,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,64,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,128,0.1809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,256,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,512,0.1952
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,1024,0.2026
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,2048,0.2163
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,4096,0.2406
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,8192,0.2511
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,16384,0.2552
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,32768,0.2808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,65536,0.3344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,131072,0.4461
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,4,0.1903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,16,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,32,0.1899
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,64,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,128,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,2,0.1904
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,256,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,8,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,512,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,1024,0.2309
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,2048,0.2610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,4096,0.2840
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,8192,0.2984
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,16384,0.3145
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,32768,0.3591
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,65536,0.4610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,2,0.2213
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,4,0.2193
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,8,0.2211
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,131072,0.6531
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,16,0.2223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,64,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,32,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,128,0.2222
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,256,0.2385
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,512,0.2525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,2048,0.3534
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,1024,0.2883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,4096,0.3808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,4,0.2756
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,2,0.2744
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,8,0.2756
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,8192,0.4073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,16384,0.4664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,32768,0.5892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,65536,0.8126
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,16,0.2749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,32,0.2749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,64,0.2747
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,131072,1.3690
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,128,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,256,0.2995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,512,0.3293
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,1024,0.3884
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,16384,0.7399
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,4096,0.5684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,32768,0.9670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,2048,0.5165
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,8192,0.6242
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,2,0.3778
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,65536,1.4715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,4,0.3776
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,8,0.3775
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,16,0.3772
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,64,0.3763
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,32,0.3779
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,128,0.3841
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,256,0.4188
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,512,0.4701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,2,0.1225
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,2048,0.8320
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,4,0.1173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,16384,1.2419
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,4096,0.9153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,8,0.1236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,1024,0.5902
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,8192,1.0245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,16,0.1254
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,32,0.1136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,64,0.1215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,128,0.1215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,256,0.1215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,512,0.1275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,1024,0.1232
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,2048,0.1377
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,16384,0.1293
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,4096,0.1399
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,32768,0.1414
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,65536,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,131072,0.1883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,8192,0.1401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,32768,1.6666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,2,0.1237
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,4,0.1320
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,8,0.1266
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,16,0.1345
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,32,0.1338
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,64,0.1239
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,128,0.1221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,256,0.1236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,512,0.1276
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,1024,0.1265
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,4096,0.1467
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,2048,0.1362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,8192,0.1561
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,16384,0.1437
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,32768,0.1456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,65536,0.1691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,131072,0.2092
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,2,0.1317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,4,0.1420
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,8,0.1378
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,16,0.1324
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,32,0.1322
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,64,0.1336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,128,0.1320
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,256,0.1381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,512,0.1428
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,1024,0.1366
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,2048,0.1426
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,4096,0.1506
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,8192,0.1611
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,16384,0.1534
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,32768,0.1600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,65536,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,131072,0.2288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,2,0.1400
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,4,0.1530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,8,0.1439
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,16,0.1485
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,32,0.1405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,64,0.1408
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,128,0.1484
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,256,0.1441
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,1024,0.1543
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,2048,0.1504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,4096,0.1605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,8192,0.1743
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,16384,0.1777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,512,0.1426
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,32768,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,65536,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,131072,0.2546
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,2,0.1527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,4,0.1517
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,8,0.1589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,16,0.1566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,32,0.1576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,64,0.1526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,128,0.1507
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,512,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,1024,0.1671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,2048,0.1773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,256,0.1518
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,4096,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,8192,0.1932
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,16384,0.1945
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,32768,0.2057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,65536,0.2398
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,131072,0.2873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,2,0.1659
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,4,0.1601
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,8,0.1673
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,16,0.1586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,32,0.1563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,64,0.1594
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,128,0.1639
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,256,0.1641
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,512,0.1678
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,1024,0.1766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,4096,0.2049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,8192,0.2218
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,2048,0.1920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,16384,0.2190
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,32768,0.2295
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,65536,0.2733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,2,0.1715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,4,0.1769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,131072,0.3393
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,8,0.1730
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,16,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,32,0.1716
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,64,0.1771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,128,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,256,0.1776
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,512,0.1906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,2048,0.2146
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,1024,0.2017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,4096,0.2372
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,8192,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,16384,0.2532
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,32768,0.2726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,65536,0.3305
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,131072,0.4408
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,2,0.1826
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,4,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,8,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,16,0.1875
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,32,0.1849
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,64,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,128,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,256,0.2017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,512,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,2048,0.2611
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,1024,0.2256
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,4096,0.2814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,8192,0.2942
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,16384,0.3117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,32768,0.3539
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,2,0.2186
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,65536,0.4529
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,4,0.2156
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,8,0.2154
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,16,0.2156
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,131072,0.6482
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,32,0.2190
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,64,0.2165
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,128,0.2181
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,256,0.2327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,512,0.2490
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,2048,0.3562
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,4096,0.3748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,1024,0.2828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,8192,0.4057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,16384,0.4609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,32768,0.5778
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,2,0.2688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,4,0.2681
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,8,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,65536,0.8106
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,131072,1.3662
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,16,0.2696
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,32,0.2672
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,64,0.2704
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,128,0.2726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,256,0.2920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,512,0.3220
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,2048,0.5081
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,4096,0.5655
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,1024,0.3856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,16384,0.7313
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,8192,0.6215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,32768,0.9621
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,2,0.3707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,4,0.3697
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,65536,1.4658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,8,0.3703
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,16,0.3711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,32,0.3725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,64,0.3715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,128,0.3769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,256,0.4098
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,512,0.4622
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,2048,0.8252
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,2,0.2223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,4096,0.9076
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,1024,0.5818
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,4,0.2232
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,8192,1.0133
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,32768,1.6610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,8,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,16,0.2224
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,16384,1.2365
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,32,0.2286
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,64,0.2243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,128,0.2232
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,256,0.2497
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,1024,0.2476
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,2048,0.2267
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,8192,0.2447
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,32768,0.2412
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,16384,0.2383
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,4096,0.2725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,512,0.2166
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,65536,0.2620
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,131072,0.2798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,4,0.2327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,2,0.2587
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,8,0.2370
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,16,0.2618
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,32,0.2374
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,64,0.2350
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,128,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,256,0.2289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,512,0.2298
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,2048,0.2307
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,4096,0.2528
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,1024,0.2371
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,8192,0.2556
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,16384,0.2425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,32768,0.2597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,131072,0.2940
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,2,0.2437
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,65536,0.2696
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,4,0.2529
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,8,0.2493
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,16,0.2504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,32,0.2448
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,64,0.2453
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,128,0.2505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,256,0.2429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,512,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,1024,0.2546
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,2048,0.2512
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,4096,0.2627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,8192,0.2702
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,16384,0.2669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,32768,0.2652
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,65536,0.2847
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,131072,0.3177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,2,0.2674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,4,0.2564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,8,0.2635
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,16,0.2602
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,32,0.2668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,128,0.2559
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,64,0.2563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,256,0.2577
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,512,0.2668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,1024,0.2571
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,2048,0.2593
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,4096,0.2801
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,8192,0.2863
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,16384,0.2746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,32768,0.2798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,65536,0.3036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,131072,0.3511
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,2,0.2782
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,4,0.2698
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,8,0.2750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,16,0.2724
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,32,0.2773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,64,0.2710
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,128,0.2714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,256,0.2795
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,512,0.2751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,1024,0.2716
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,2048,0.2764
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,4096,0.2901
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,8192,0.2946
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,16384,0.2975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,32768,0.3002
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,65536,0.3373
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,131072,0.3836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,2,0.3024
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,4,0.2989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,8,0.2993
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,16,0.2996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,32,0.3039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,64,0.2982
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,128,0.3061
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,256,0.2981
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,512,0.3032
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,1024,0.3044
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,2048,0.3033
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,4096,0.3175
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,8192,0.3245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,16384,0.3245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,32768,0.3458
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,65536,0.3799
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,131072,0.4506
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,2,0.3581
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,4,0.3588
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,8,0.3579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,16,0.3564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,32,0.3562
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,64,0.3615
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,128,0.3542
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,256,0.3556
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,1024,0.3625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,512,0.3573
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,2048,0.3619
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,4096,0.3784
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,8192,0.3903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,16384,0.3920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,65536,0.4763
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,32768,0.4181
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,2,0.4549
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,131072,0.5888
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,8,0.4504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,4,0.4512
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,16,0.4553
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,32,0.4490
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,64,0.4500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,128,0.4547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,256,0.4496
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,512,0.4509
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,1024,0.4560
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,2048,0.4557
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,4096,0.4792
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,16384,0.5081
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,32768,0.5589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,65536,0.6504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,8192,0.4921
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,131072,0.8460
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,2,0.6849
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,4,0.6864
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,8,0.6913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,16,0.6823
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,32,0.6893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,64,0.6895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,128,0.6960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,256,0.6961
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,512,0.6973
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,1024,0.7020
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,2048,0.7139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,4096,0.7304
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,8192,0.7624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,16384,0.8150
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,32768,0.9359
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,65536,1.1661
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,2,1.1496
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,4,1.1436
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,131072,1.7231
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,8,1.1438
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,16,1.1470
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,32,1.1477
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,64,1.1547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,128,1.1593
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,256,1.1622
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,512,1.1623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,1024,1.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,4096,1.2363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,8192,1.2893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,2048,1.1835
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,16384,1.4032
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,32768,1.6349
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,2,1.9863
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,4,1.9856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,8,1.9876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,65536,2.1429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,16,1.9913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,32,1.9936
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,64,1.9996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,128,1.9966
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,256,2.0050
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,512,2.0108
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,2048,2.0560
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,4096,2.1400
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,1024,2.0248
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,2,0.1773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,8192,2.2496
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,4,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,32768,2.8863
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,8,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,16,0.1779
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,16384,2.4561
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,32,0.1894
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,64,0.2027
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,128,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,256,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,512,0.1827
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,1024,0.1781
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,2048,0.1821
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,4096,0.2060
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,8192,0.2090
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,32768,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,16384,0.2204
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,65536,0.2136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,131072,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,2,0.1885
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,8,0.1982
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,4,0.2112
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,32,0.1992
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,16,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,64,0.1980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,128,0.1889
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,256,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,512,0.1985
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,1024,0.1904
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,2048,0.1989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,4096,0.2155
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,16384,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,32768,0.2082
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,8192,0.2096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,131072,0.2668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,65536,0.2276
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,2,0.2051
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,4,0.2026
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,8,0.2048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,16,0.1992
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,32,0.1987
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,64,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,512,0.1979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,256,0.2006
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,128,0.1991
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,1024,0.2065
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,2048,0.2042
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,4096,0.2137
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,8192,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,16384,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,32768,0.2229
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,65536,0.2401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,131072,0.2696
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,2,0.2116
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,4,0.2040
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,16,0.2112
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,32,0.2135
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,8,0.2046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,64,0.2046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,128,0.2101
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,256,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,512,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,1024,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,2048,0.2058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,4096,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,8192,0.2285
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,32768,0.2317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,65536,0.2527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,131072,0.2958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,16384,0.2206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,2,0.2255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,4,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,8,0.2284
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,32,0.2213
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,16,0.2190
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,64,0.2210
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,128,0.2255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,256,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,512,0.2248
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,2048,0.2178
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,1024,0.2284
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,4096,0.2383
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,16384,0.2429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,8192,0.2422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,32768,0.2477
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,65536,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,2,0.2427
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,4,0.2341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,8,0.2377
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,131072,0.3323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,16,0.2401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,32,0.2349
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,64,0.2354
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,128,0.2345
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,256,0.2436
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,512,0.2347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,1024,0.2381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,2048,0.2368
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,4096,0.2576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,8192,0.2587
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,16384,0.2634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,32768,0.2737
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,65536,0.3216
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,131072,0.3835
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,2,0.2649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,4,0.2650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,8,0.2678
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,16,0.2662
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,32,0.2650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,64,0.2701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,128,0.2688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,256,0.2664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,512,0.2647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,1024,0.2647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,2048,0.2703
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,4096,0.2869
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,8192,0.2996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,16384,0.2998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,32768,0.3237
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,65536,0.3827
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,2,0.3161
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,131072,0.4956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,4,0.3191
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,8,0.3180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,16,0.3187
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,32,0.3169
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,64,0.3163
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,128,0.3162
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,256,0.3146
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,512,0.3200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,1024,0.3202
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,2048,0.3232
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,4096,0.3421
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,8192,0.3552
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,16384,0.3691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,32768,0.4178
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,65536,0.5173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,2,0.4490
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,131072,0.7186
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,4,0.4456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,8,0.4432
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,16,0.4440
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,32,0.4451
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,64,0.4452
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,128,0.4436
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,256,0.4501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,512,0.4463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,1024,0.4513
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,2048,0.4595
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,4096,0.4824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,8192,0.5111
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,16384,0.5726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,32768,0.6864
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,65536,0.9161
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,4,0.6878
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,2,0.6908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,8,0.6904
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,16,0.6913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,32,0.6922
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,64,0.6962
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,131072,1.4767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,128,0.7024
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,256,0.7051
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,512,0.7065
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,1024,0.7130
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,2048,0.7270
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,16384,0.9417
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,4096,0.7770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,8192,0.8311
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,32768,1.1693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,2,1.1555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,4,1.1571
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,8,1.1561
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,16,1.1597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,65536,1.6711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,32,1.1610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,64,1.1616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,128,1.1635
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,256,1.1679
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,2048,1.2163
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,512,1.1771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,4096,1.3006
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,2,0.1634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,1024,1.1861
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,8,0.1621
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,4,0.1713
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,32768,2.0549
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,16,0.1628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,32,0.1679
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,16384,1.6246
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,64,0.1703
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,8192,1.4113
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,128,0.1628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,256,0.1630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,512,0.1683
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,1024,0.1735
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,2048,0.1690
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,8192,0.1820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,16384,0.1804
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,4096,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,32768,0.1800
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,65536,0.2095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,131072,0.2239
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,2,0.1828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,8,0.1826
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,4,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,16,0.1756
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,32,0.1861
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,64,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,256,0.1809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,512,0.1741
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,1024,0.1766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,4096,0.1994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,2048,0.1722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,128,0.1767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,8192,0.2034
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,16384,0.1916
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,32768,0.2018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,65536,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,131072,0.2480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,2,0.1833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,4,0.1828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,8,0.1848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,16,0.1911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,32,0.1918
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,64,0.1836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,128,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,256,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,512,0.1850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,1024,0.1926
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,2048,0.1857
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,4096,0.2009
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,16384,0.2040
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,8192,0.2067
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,32768,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,65536,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,131072,0.2596
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,2,0.1988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,4,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,16,0.1978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,8,0.1932
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,32,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,64,0.1992
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,128,0.1917
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,256,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,512,0.1947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,1024,0.1920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,2048,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,4096,0.2150
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,16384,0.2158
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,32768,0.2169
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,65536,0.2385
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,131072,0.2808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,8192,0.2123
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,2,0.2034
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,4,0.2126
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,8,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,16,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,32,0.2098
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,128,0.2023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,64,0.2054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,512,0.2113
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,256,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,1024,0.2096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,2048,0.2061
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,4096,0.2238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,8192,0.2278
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,32768,0.2424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,65536,0.2634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,16384,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,131072,0.3188
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,2,0.2232
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,4,0.2243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,8,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,64,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,32,0.2190
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,128,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,16,0.2185
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,256,0.2225
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,512,0.2202
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,1024,0.2244
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,2048,0.2289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,4096,0.2363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,8192,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,16384,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,32768,0.2634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,65536,0.3001
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,2,0.2494
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,4,0.2464
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,131072,0.3751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,8,0.2471
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,16,0.2495
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,64,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,128,0.2488
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,256,0.2517
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,512,0.2507
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,32,0.2477
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,1024,0.2490
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,2048,0.2503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,16384,0.2853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,4096,0.2689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,8192,0.2832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,32768,0.3073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,65536,0.3708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,2,0.2926
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,131072,0.4773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,4,0.2931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,8,0.2921
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,16,0.2966
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,32,0.2926
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,64,0.2920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,128,0.2972
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,256,0.2966
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,512,0.2961
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,1024,0.2958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,4096,0.3255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,2048,0.2977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,16384,0.3488
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,8192,0.3370
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,32768,0.3968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,65536,0.4952
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,131072,0.6893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,2,0.4118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,4,0.4097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,8,0.4114
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,16,0.4079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,32,0.4083
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,128,0.4138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,64,0.4089
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,256,0.4151
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,1024,0.4147
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,512,0.4116
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,2048,0.4235
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,4096,0.4508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,32768,0.6500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,8192,0.4779
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,16384,0.5318
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,65536,0.8853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,131072,1.4419
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,4,0.6187
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,2,0.6203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,8,0.6187
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,16,0.6236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,32,0.6233
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,64,0.6288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,128,0.6307
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,256,0.6291
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,512,0.6311
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,2048,0.6530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,1024,0.6410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,4096,0.7065
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,8192,0.7597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,16384,0.8710
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,32768,1.1024
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,4,1.0284
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,2,1.0268
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,65536,1.6069
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,8,1.0290
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,16,1.0336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,32,1.0375
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,64,1.0423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,128,1.0420
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,256,1.0428
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,512,1.0492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,2048,1.0883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,2,0.1676
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,4096,1.1771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,1024,1.0663
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,8,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,16,0.1604
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,4,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,32,0.1603
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,16384,1.4964
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,64,0.1609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,8192,1.2848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,128,0.1569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,256,0.1615
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,512,0.1632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,1024,0.1560
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,32768,1.9207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,2048,0.1639
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,4096,0.1747
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,8192,0.1870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,16384,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,32768,0.1763
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,65536,0.2050
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,131072,0.2235
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,2,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,4,0.1795
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,8,0.1687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,16,0.1687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,32,0.1730
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,64,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,128,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,256,0.1786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,512,0.1701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,2048,0.1710
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,4096,0.1867
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,8192,0.1998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,1024,0.1732
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,32768,0.1978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,65536,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,2,0.1758
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,131072,0.2387
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,16384,0.1889
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,4,0.1871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,8,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,16,0.1787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,32,0.1846
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,64,0.1774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,128,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,256,0.1881
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,512,0.1787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,2048,0.1839
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,1024,0.1773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,4096,0.1998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,8192,0.1974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,16384,0.2020
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,65536,0.2239
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,32768,0.1971
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,2,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,4,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,8,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,16,0.1868
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,32,0.1865
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,64,0.1865
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,131072,0.2478
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,128,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,256,0.1932
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,512,0.1844
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,1024,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,2048,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,4096,0.2030
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,16384,0.2098
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,32768,0.2153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,65536,0.2303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,131072,0.2730
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,2,0.2009
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,8192,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,4,0.1991
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,8,0.1944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,16,0.2052
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,32,0.2009
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,64,0.1941
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,128,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,256,0.2005
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,512,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,1024,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,4096,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,2048,0.2011
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,8192,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,16384,0.2136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,32768,0.2236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,65536,0.2586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,131072,0.3085
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,2,0.2088
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,4,0.2155
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,8,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,32,0.2106
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,16,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,64,0.2152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,256,0.2082
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,512,0.2135
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,1024,0.2113
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,2048,0.2104
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,128,0.2098
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,4096,0.2302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,8192,0.2427
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,16384,0.2381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,32768,0.2488
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,65536,0.2934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,131072,0.3581
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,2,0.2353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,4,0.2395
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,8,0.2360
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,16,0.2360
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,32,0.2397
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,64,0.2338
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,128,0.2390
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,256,0.2378
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,512,0.2372
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,1024,0.2372
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,2048,0.2378
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,4096,0.2573
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,8192,0.2688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,16384,0.2733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,32768,0.2975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,65536,0.3579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,131072,0.4665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,2,0.2752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,4,0.2761
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,8,0.2787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,16,0.2789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,32,0.2756
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,64,0.2756
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,128,0.2768
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,256,0.2759
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,512,0.2818
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,1024,0.2816
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,2048,0.2850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,4096,0.3025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,16384,0.3313
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,8192,0.3180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,32768,0.3786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,65536,0.4774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,131072,0.6749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,4,0.3825
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,2,0.3836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,8,0.3810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,16,0.3808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,32,0.3822
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,64,0.3841
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,128,0.3827
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,256,0.3864
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,512,0.3844
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,2048,0.3976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,1024,0.3882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,4096,0.4184
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,16384,0.5049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,8192,0.4482
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,32768,0.6232
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,65536,0.8575
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,2,0.5670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,131072,1.4121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,4,0.5681
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,8,0.5675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,16,0.5687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,32,0.5674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,64,0.5707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,128,0.5775
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,256,0.5759
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,512,0.5799
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,2048,0.5997
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,1024,0.5866
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,4096,0.6483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,16384,0.8126
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,32768,1.0484
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,8192,0.7041
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,2,0.9151
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,65536,1.5474
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,4,0.9170
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,8,0.9147
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,16,0.9164
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,32,0.9207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,64,0.9243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,128,0.9236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,256,0.9258
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,512,0.9346
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,2048,0.9713
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,2,0.1546
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,4096,1.0636
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,4,0.1543
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,1024,0.9508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,8192,1.1681
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,8,0.1562
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,16,0.1545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,16384,1.3856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,32,0.1522
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,32768,1.8179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,64,0.1546
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,128,0.1486
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,256,0.1538
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,512,0.1518
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,1024,0.1487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,2048,0.1555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,4096,0.1624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,8192,0.1678
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,16384,0.1693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,32768,0.1711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,65536,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,2,0.1599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,131072,0.2193
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,4,0.1623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,8,0.1553
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,16,0.1658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,32,0.1635
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,64,0.1555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,128,0.1681
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,256,0.1659
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,512,0.1564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,1024,0.1563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,2048,0.1683
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,4096,0.1794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,16384,0.1727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,32768,0.1890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,65536,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,8192,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,131072,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,2,0.1663
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,4,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,16,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,32,0.1758
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,8,0.1724
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,64,0.1625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,128,0.1665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,256,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,512,0.1746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,2048,0.1748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,1024,0.1702
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,4096,0.1921
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,8192,0.1869
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,16384,0.1936
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,32768,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,65536,0.2160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,131072,0.2504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,2,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,4,0.1742
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,8,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,16,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,32,0.1829
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,64,0.1754
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,128,0.1746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,256,0.1777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,512,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,1024,0.1870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,4096,0.2029
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,2048,0.1777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,8192,0.2076
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,16384,0.1942
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,32768,0.2051
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,65536,0.2371
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,131072,0.2638
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,2,0.1961
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,4,0.1859
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,8,0.1969
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,16,0.1882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,32,0.1994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,64,0.1920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,128,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,256,0.1887
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,512,0.1965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,1024,0.1899
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,4096,0.2178
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,8192,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,2048,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,16384,0.2101
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,32768,0.2283
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,65536,0.2501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,131072,0.2988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,2,0.2076
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,8,0.2054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,4,0.2030
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,16,0.2073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,32,0.2051
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,64,0.2018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,128,0.2100
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,256,0.2081
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,512,0.2049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,1024,0.2091
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,2048,0.2080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,4096,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,8192,0.2311
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,16384,0.2360
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,32768,0.2488
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,65536,0.2875
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,2,0.2271
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,131072,0.3536
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,4,0.2310
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,8,0.2312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,16,0.2263
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,32,0.2316
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,64,0.2289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,128,0.2270
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,256,0.2342
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,512,0.2302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,1024,0.2336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,2048,0.2316
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,4096,0.2504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,8192,0.2630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,16384,0.2645
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,32768,0.2965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,65536,0.3516
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,2,0.2652
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,131072,0.4655
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,4,0.2650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,16,0.2647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,8,0.2684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,32,0.2676
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,64,0.2701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,128,0.2667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,256,0.2668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,512,0.2709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,2048,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,4096,0.2975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,16384,0.3238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,8192,0.3101
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,32768,0.3697
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,1024,0.2743
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,65536,0.4711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,2,0.3702
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,131072,0.6627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,4,0.3672
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,8,0.3695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,16,0.3684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,32,0.3684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,64,0.3719
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,128,0.3695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,256,0.3738
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,512,0.3717
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,2048,0.3838
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,4096,0.4056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,1024,0.3738
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,16384,0.4934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,8192,0.4379
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,32768,0.6109
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,65536,0.8471
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,2,0.5420
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,4,0.5428
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,8,0.5418
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,32,0.5432
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,64,0.5461
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,131072,1.3997
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,16,0.5424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,128,0.5525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,256,0.5505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,512,0.5531
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,2048,0.5726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,1024,0.5609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,4096,0.6235
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,16384,0.7882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,8192,0.6783
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,65536,1.5246
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,32768,1.0238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,2,0.8601
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,4,0.8573
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,8,0.8597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,16,0.8620
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,32,0.8632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,64,0.8687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,128,0.8686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,256,0.8732
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,512,0.8793
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,2048,0.9183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,1024,0.8907
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,4096,1.0075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,2,0.1468
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,16384,1.3265
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,8192,1.1088
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,4,0.1463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,8,0.1482
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,16,0.1424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,32768,1.7586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,32,0.1467
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,64,0.1448
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,128,0.1447
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,256,0.1545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,512,0.1470
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,1024,0.1521
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,2048,0.1407
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,4096,0.1648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,8192,0.1636
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,16384,0.1653
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,32768,0.1642
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,65536,0.1819
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,131072,0.2174
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,4,0.1595
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,2,0.1517
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,16,0.1547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,32,0.1616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,64,0.1490
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,8,0.1569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,128,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,256,0.1542
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,512,0.1559
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,1024,0.1523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,2048,0.1521
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,4096,0.1767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,8192,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,16384,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,32768,0.1747
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,65536,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,131072,0.2328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,2,0.1705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,4,0.1654
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,8,0.1600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,16,0.1687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,32,0.1629
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,64,0.1616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,128,0.1656
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,256,0.1614
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,512,0.1683
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,1024,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,4096,0.1763
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,2048,0.1654
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,8192,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,16384,0.1842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,32768,0.1876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,65536,0.2100
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,131072,0.2506
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,2,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,4,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,8,0.1719
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,16,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,32,0.1796
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,64,0.1691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,128,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,256,0.1760
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,512,0.1691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,1024,0.1691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,2048,0.1813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,4096,0.1908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,8192,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,16384,0.1903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,32768,0.2059
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,65536,0.2252
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,131072,0.2596
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,2,0.1880
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,4,0.1936
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,8,0.1886
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,16,0.1822
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,32,0.1884
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,64,0.1861
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,128,0.1842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,256,0.1856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,512,0.1944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,1024,0.1928
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,2048,0.1895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,4096,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,8192,0.2177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,16384,0.2173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,32768,0.2192
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,65536,0.2490
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,131072,0.2964
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,2,0.1994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,4,0.2009
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,8,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,16,0.2051
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,32,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,64,0.1988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,128,0.1981
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,256,0.2067
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,512,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,1024,0.2050
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,2048,0.2106
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,4096,0.2194
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,8192,0.2320
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,16384,0.2277
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,32768,0.2499
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,65536,0.2817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,2,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,4,0.2262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,131072,0.3570
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,16,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,8,0.2237
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,32,0.2292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,128,0.2249
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,64,0.2263
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,256,0.2252
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,512,0.2315
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,1024,0.2317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,2048,0.2292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,4096,0.2509
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,8192,0.2592
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,16384,0.2674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,65536,0.3483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,32768,0.2884
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,2,0.2644
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,131072,0.4579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,4,0.2629
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,8,0.2599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,16,0.2619
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,32,0.2635
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,64,0.2609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,128,0.2644
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,256,0.2620
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,512,0.2654
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,2048,0.2684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,1024,0.2693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,4096,0.2891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,8192,0.3056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,16384,0.3236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,32768,0.3682
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,65536,0.4695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,2,0.3605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,16,0.3608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,8,0.3605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,131072,0.6670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,32,0.3625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,4,0.3594
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,64,0.3634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,128,0.3650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,256,0.3641
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,512,0.3638
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,2048,0.3744
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,1024,0.3664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,4096,0.4007
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,16384,0.4875
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,8192,0.4273
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,32768,0.6063
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,65536,0.8359
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,2,0.5281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,4,0.5263
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,8,0.5298
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,16,0.5294
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,131072,1.3918
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,32,0.5307
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,64,0.5330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,128,0.5381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,256,0.5371
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,512,0.5388
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,2048,0.5619
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,1024,0.5496
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,4096,0.6111
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,16384,0.7789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,8192,0.6670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,2,0.8308
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,32768,1.0061
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,4,0.8331
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,8,0.8328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,16,0.8351
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,32,0.8353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,65536,1.5277
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,64,0.8403
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,128,0.8409
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,256,0.8421
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,512,0.8506
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,2048,0.8929
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,2,0.1403
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,4096,0.9818
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,4,0.1423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,1024,0.8644
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,8,0.1522
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,8192,1.0916
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,16,0.1398
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,32,0.1346
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,16384,1.3043
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,64,0.1399
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,128,0.1418
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,256,0.1501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,512,0.1338
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,1024,0.1381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,2048,0.1399
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,4096,0.1642
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,8192,0.1524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,16384,0.1576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,32768,0.1620
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,65536,0.1775
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,131072,0.2017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,32768,1.7401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,2,0.1514
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,4,0.1462
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,8,0.1552
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,16,0.1441
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,32,0.1562
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,64,0.1595
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,128,0.1460
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,256,0.1479
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,512,0.1546
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,1024,0.1546
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,2048,0.1462
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,4096,0.1707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,8192,0.1675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,16384,0.1726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,32768,0.1734
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,65536,0.1869
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,131072,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,2,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,4,0.1675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,8,0.1563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,16,0.1637
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,64,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,128,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,256,0.1642
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,512,0.1566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,32,0.1673
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,1024,0.1558
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,2048,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,8192,0.1889
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,16384,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,32768,0.1886
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,65536,0.2085
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,4096,0.1726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,131072,0.2456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,2,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,4,0.1685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,8,0.1665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,16,0.1727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,32,0.1719
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,64,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,128,0.1756
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,256,0.1676
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,512,0.1651
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,2048,0.1704
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,4096,0.1940
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,8192,0.1997
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,16384,0.1962
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,32768,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,1024,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,65536,0.2215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,131072,0.2691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,4,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,8,0.1787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,16,0.1850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,32,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,2,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,64,0.1885
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,128,0.1900
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,256,0.1810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,512,0.1826
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,1024,0.1849
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,2048,0.1848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,4096,0.2126
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,8192,0.2102
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,16384,0.2123
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,32768,0.2211
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,65536,0.2460
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,2,0.1958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,131072,0.2931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,8,0.1889
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,16,0.1971
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,4,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,32,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,64,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,128,0.1930
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,256,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,512,0.2026
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,1024,0.1958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,2048,0.2017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,4096,0.2196
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,16384,0.2254
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,65536,0.2828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,32768,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,131072,0.3544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,2,0.2201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,4,0.2211
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,8192,0.2318
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,16,0.2207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,32,0.2220
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,64,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,8,0.2224
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,128,0.2213
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,256,0.2236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,512,0.2278
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,1024,0.2301
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,2048,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,4096,0.2440
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,8192,0.2553
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,16384,0.2650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,32768,0.2868
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,65536,0.3453
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,131072,0.4586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,2,0.2608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,4,0.2565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,8,0.2572
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,16,0.2566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,32,0.2589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,64,0.2610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,128,0.2588
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,256,0.2608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,512,0.2655
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,1024,0.2633
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,2048,0.2706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,4096,0.2892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,16384,0.3222
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,8192,0.3044
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,32768,0.3629
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,65536,0.4677
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,2,0.3545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,4,0.3551
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,131072,0.6632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,8,0.3540
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,16,0.3547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,32,0.3585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,64,0.3572
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,128,0.3589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,256,0.3601
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,512,0.3591
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,2048,0.3701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,1024,0.3616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,4096,0.3971
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,16384,0.4862
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,32768,0.6000
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,8192,0.4273
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,65536,0.8356
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,2,0.5202
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,4,0.5196
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,131072,1.3879
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,8,0.5206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,16,0.5222
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,32,0.5223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,64,0.5254
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,128,0.5328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,256,0.5293
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,512,0.5327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,2048,0.5535
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,1024,0.5417
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,4096,0.6041
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,8192,0.6581
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,16384,0.7712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,32768,1.0030
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,4,0.8173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,2,0.8185
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,8,0.8186
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,16,0.8193
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,32,0.8242
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,65536,1.5023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,64,0.8269
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,128,0.8283
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,256,0.8321
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,512,0.8367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,2048,0.8772
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,1024,0.8509
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,2,0.1421
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,4096,0.9665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,4,0.1335
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,8,0.1499
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,16,0.1380
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,32768,1.7185
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,8192,1.0700
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,16384,1.2879
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,32,0.1319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,64,0.1300
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,128,0.1485
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,256,0.1378
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,512,0.1480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,1024,0.1319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,2048,0.1317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,8192,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,4096,0.1481
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,16384,0.1562
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,32768,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,65536,0.1782
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,131072,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,2,0.1530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,4,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,8,0.1425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,16,0.1480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,32,0.1423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,128,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,64,0.1480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,256,0.1466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,512,0.1521
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,1024,0.1535
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,2048,0.1480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,4096,0.1545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,8192,0.1711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,16384,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,32768,0.1626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,65536,0.1814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,131072,0.2318
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,2,0.1590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,4,0.1548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,8,0.1543
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,16,0.1610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,32,0.1541
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,64,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,128,0.1563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,256,0.1586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,512,0.1523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,1024,0.1545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,2048,0.1545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,4096,0.1654
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,8192,0.1847
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,16384,0.1735
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,32768,0.1876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,65536,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,131072,0.2456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,2,0.1597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,4,0.1621
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,8,0.1665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,16,0.1686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,64,0.1706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,32,0.1595
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,128,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,256,0.1624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,512,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,2048,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,1024,0.1661
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,4096,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,8192,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,16384,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,32768,0.2020
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,65536,0.2174
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,131072,0.2660
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,2,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,4,0.1807
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,8,0.1833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,16,0.1783
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,32,0.1848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,64,0.1742
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,128,0.1890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,256,0.1769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,512,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,2048,0.1862
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,4096,0.2053
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,8192,0.2082
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,16384,0.2130
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,32768,0.2125
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,65536,0.2413
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,1024,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,131072,0.2972
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,2,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,4,0.1977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,8,0.1952
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,16,0.1879
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,32,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,64,0.1917
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,128,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,256,0.1979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,512,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,2048,0.2026
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,4096,0.2143
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,8192,0.2205
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,16384,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,32768,0.2347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,65536,0.2812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,131072,0.3480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,2,0.2163
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,1024,0.1982
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,8,0.2205
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,4,0.2176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,16,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,32,0.2170
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,64,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,128,0.2219
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,256,0.2197
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,512,0.2243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,1024,0.2212
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,2048,0.2217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,4096,0.2403
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,8192,0.2509
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,16384,0.2588
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,32768,0.2858
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,65536,0.3451
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,131072,0.4525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,2,0.2500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,4,0.2544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,8,0.2572
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,16,0.2516
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,32,0.2525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,64,0.2573
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,128,0.2538
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,256,0.2598
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,512,0.2607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,1024,0.2600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,2048,0.2677
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,4096,0.2886
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,16384,0.3130
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,8192,0.2975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,32768,0.3595
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,65536,0.4623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,2,0.3555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,4,0.3546
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,131072,0.6605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,8,0.3497
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,16,0.3510
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,32,0.3551
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,64,0.3537
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,128,0.3567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,256,0.3585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,512,0.3592
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,1024,0.3592
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,2048,0.3669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,4096,0.3926
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,8192,0.4241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,16384,0.4803
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,32768,0.5968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,65536,0.8320
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,2,0.5164
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,131072,1.3865
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,4,0.5148
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,8,0.5144
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,16,0.5168
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,32,0.5162
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,64,0.5231
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,128,0.5257
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,256,0.5258
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,512,0.5292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,2048,0.5480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,4096,0.5986
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,1024,0.5355
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,16384,0.7675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,8192,0.6517
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,32768,0.9957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,65536,1.4984
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,2,0.8109
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,4,0.8118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,8,0.8100
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,16,0.8143
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,32,0.8153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,64,0.8209
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,128,0.8177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,256,0.8256
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,2048,0.8717
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,512,0.8305
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,4096,0.9648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,16384,1.2777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,1024,0.8441
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,8192,1.0678
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,32768,1.7066
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,2,0.2283
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,4,0.2233
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,8,0.2284
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,512,0.2286
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,32,0.2289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,16,0.2156
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,64,0.2187
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,1024,0.2301
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,2048,0.2285
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,4096,0.2498
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,256,0.2210
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,8192,0.2541
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,16384,0.2318
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,32768,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,65536,0.2690
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,128,0.2354
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,131072,0.2900
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,2,0.2341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,4,0.2237
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,8,0.2329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,16,0.2309
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,64,0.2234
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,128,0.2290
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,256,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,512,0.2268
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,1024,0.2507
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,32,0.2233
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,2048,0.2262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,4096,0.2519
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,8192,0.2589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,16384,0.2417
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,32768,0.2451
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,65536,0.2699
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,131072,0.3018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,2,0.2384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,4,0.2315
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,8,0.2382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,16,0.2306
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,32,0.2292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,64,0.2285
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,128,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,256,0.2362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,1024,0.2410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,512,0.2404
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,2048,0.2336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,4096,0.2510
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,8192,0.2549
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,16384,0.2592
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,32768,0.2552
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,131072,0.3131
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,2,0.2391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,4,0.2429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,65536,0.2753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,8,0.2394
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,16,0.2456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,32,0.2379
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,64,0.2415
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,128,0.2419
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,256,0.2366
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,512,0.2436
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,1024,0.2478
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,2048,0.2410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,4096,0.2616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,8192,0.2677
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,32768,0.2697
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,16384,0.2679
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,65536,0.2955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,131072,0.3297
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,2,0.2466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,4,0.2498
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,8,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,16,0.2548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,32,0.2548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,128,0.2509
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,64,0.2472
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,256,0.2552
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,512,0.2489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,1024,0.2520
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,2048,0.2646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,4096,0.2707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,8192,0.2794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,16384,0.2793
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,32768,0.2879
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,65536,0.3198
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,131072,0.3709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,4,0.2623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,2,0.2636
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,8,0.2614
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,16,0.2678
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,32,0.2610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,64,0.2654
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,128,0.2712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,256,0.2689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,1024,0.2651
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,512,0.2624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,2048,0.2728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,4096,0.2916
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,8192,0.3035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,32768,0.3139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,16384,0.2949
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,131072,0.4204
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,65536,0.3482
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,2,0.2875
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,4,0.2890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,16,0.2927
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,8,0.2957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,32,0.2885
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,128,0.2883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,64,0.2891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,256,0.3004
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,512,0.2896
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,1024,0.2939
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,4096,0.3241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,2048,0.3064
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,8192,0.3314
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,16384,0.3335
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,32768,0.3588
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,65536,0.4210
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,2,0.3695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,4,0.3739
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,8,0.3729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,16,0.3676
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,32,0.3712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,131072,0.5294
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,64,0.3684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,128,0.3715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,256,0.3705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,512,0.3769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,1024,0.3836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,2048,0.3947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,4096,0.4176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,8192,0.4338
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,32768,0.4913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,16384,0.4478
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,65536,0.5954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,2,0.5612
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,131072,0.7965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,4,0.5265
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,16,0.5289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,8,0.5303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,32,0.5265
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,64,0.5353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,128,0.5345
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,256,0.5409
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,512,0.5461
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,1024,0.5624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,2048,0.5867
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,4096,0.6235
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,8192,0.6484
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,16384,0.7036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,32768,0.8236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,65536,1.0521
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,2,0.8061
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,4,0.8071
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,8,0.8081
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,131072,1.6083
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,16,0.8548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,32,0.8062
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,64,0.8079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,128,0.8715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,256,0.8641
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,512,0.8334
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,1024,0.8693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,2048,0.9088
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,4096,0.9694
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,32768,1.3733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,8192,1.0251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,16384,1.1402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,2,1.4151
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,4,1.4159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,8,1.4159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,65536,1.8974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,16,1.4191
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,32,1.4247
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,64,1.4256
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,128,1.4347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,256,1.4523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,512,1.5415
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,2,0.2096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,2048,1.6196
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,4,0.2085
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,4096,1.7276
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,1024,1.5206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,8,0.2049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,8192,1.8367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,16,0.2188
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,32,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,64,0.2128
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,16384,2.0497
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,32768,2.4753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,128,0.2095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,256,0.1994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,1024,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,2048,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,512,0.2068
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,4096,0.2167
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,32768,0.2321
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,16384,0.2146
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,65536,0.2523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,131072,0.2633
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,8192,0.2253
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,2,0.2152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,8,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,4,0.2071
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,16,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,32,0.2141
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,64,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,128,0.2165
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,256,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,1024,0.2192
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,512,0.2068
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,2048,0.2091
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,4096,0.2265
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,8192,0.2386
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,16384,0.2343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,32768,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,65536,0.2572
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,131072,0.2863
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,2,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,4,0.2260
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,8,0.2120
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,16,0.2189
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,32,0.2122
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,64,0.2158
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,128,0.2214
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,256,0.2220
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,512,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,1024,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,2048,0.2268
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,4096,0.2358
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,16384,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,8192,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,32768,0.2500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,65536,0.2590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,131072,0.2863
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,2,0.2194
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,4,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,8,0.2321
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,16,0.2307
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,32,0.2246
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,64,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,128,0.2228
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,256,0.2213
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,512,0.2289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,2048,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,1024,0.2223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,4096,0.2515
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,8192,0.2489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,16384,0.2464
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,65536,0.2855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,32768,0.2584
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,131072,0.3122
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,2,0.2278
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,8,0.2306
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,16,0.2275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,64,0.2334
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,128,0.2332
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,4,0.2369
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,256,0.2290
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,32,0.2362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,1024,0.2362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,512,0.2312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,2048,0.2371
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,4096,0.2537
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,8192,0.2587
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,16384,0.2615
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,32768,0.2688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,65536,0.3049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,131072,0.3491
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,2,0.2428
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,4,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,8,0.2404
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,16,0.2477
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,32,0.2448
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,64,0.2429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,128,0.2444
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,256,0.2450
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,512,0.2425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,2048,0.2588
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,1024,0.2459
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,4096,0.2716
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,8192,0.2804
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,16384,0.2723
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,32768,0.2890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,65536,0.3309
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,131072,0.3971
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,2,0.2656
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,4,0.2610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,8,0.2725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,32,0.2606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,16,0.2613
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,64,0.2647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,256,0.2662
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,128,0.2601
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,512,0.2630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,1024,0.2687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,2048,0.2733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,4096,0.2923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,8192,0.3081
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,16384,0.3066
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,32768,0.3336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,65536,0.3902
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,2,0.3330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,131072,0.5015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,4,0.3223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,8,0.3427
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,16,0.3210
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,32,0.3275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,64,0.3205
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,128,0.3412
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,256,0.3410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,512,0.3288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,1024,0.3333
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,2048,0.3469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,4096,0.3705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,8192,0.3867
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,16384,0.4007
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,32768,0.4464
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,65536,0.5479
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,131072,0.7479
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,2,0.4436
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,4,0.4583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,8,0.4461
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,16,0.4449
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,64,0.4444
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,32,0.4451
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,128,0.4468
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,256,0.4526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,512,0.4770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,1024,0.4800
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,2048,0.4971
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,4096,0.5308
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,16384,0.6154
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,8192,0.5601
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,32768,0.7363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,65536,0.9699
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,2,0.6668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,4,0.6642
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,8,0.6643
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,131072,1.5255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,16,0.6683
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,32,0.7141
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,64,0.6758
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,128,0.6816
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,256,0.7362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,512,0.6984
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,1024,0.7281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,16384,1.0031
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,2048,0.7773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,4096,0.8368
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,8192,0.8909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,32768,1.2324
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,2,1.1536
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,65536,1.7424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,4,1.1539
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,8,1.3115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,16,1.1576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,32,1.1654
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,64,1.1687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,128,1.1678
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,256,1.2225
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,512,1.2066
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,2,0.1869
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,2048,1.3575
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,4,0.1961
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,4096,1.4627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,16384,1.7826
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,1024,1.2561
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,8,0.1829
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,8192,1.5725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,16,0.1967
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,32,0.1825
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,64,0.1981
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,128,0.1863
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,256,0.1856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,512,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,1024,0.2001
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,4096,0.2185
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,32768,2.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,8192,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,2048,0.1957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,16384,0.2026
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,32768,0.2206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,65536,0.2224
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,131072,0.2619
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,2,0.1997
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,4,0.2021
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,8,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,16,0.2011
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,32,0.1968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,64,0.1990
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,128,0.2006
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,256,0.1928
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,512,0.1917
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,1024,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,4096,0.2182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,2048,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,8192,0.2105
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,16384,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,32768,0.2263
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,65536,0.2429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,131072,0.2678
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,2,0.2012
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,8,0.1987
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,16,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,32,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,4,0.2047
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,64,0.2096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,128,0.2006
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,256,0.1974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,512,0.2109
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,1024,0.2019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,2048,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,4096,0.2288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,8192,0.2330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,16384,0.2196
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,32768,0.2329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,65536,0.2579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,131072,0.2793
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,2,0.2127
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,4,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,8,0.2070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,16,0.2136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,32,0.2176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,64,0.2057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,128,0.2096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,256,0.2155
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,512,0.2132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,1024,0.2099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,2048,0.2235
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,4096,0.2363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,8192,0.2442
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,16384,0.2333
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,32768,0.2478
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,65536,0.2625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,131072,0.3149
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,2,0.2150
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,4,0.2201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,8,0.2171
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,64,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,32,0.2136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,16,0.2218
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,128,0.2165
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,256,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,512,0.2198
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,2048,0.2257
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,1024,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,4096,0.2417
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,8192,0.2581
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,16384,0.2516
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,32768,0.2607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,65536,0.2858
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,131072,0.3342
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,2,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,4,0.2251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,8,0.2303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,16,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,32,0.2286
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,64,0.2272
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,128,0.2257
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,256,0.2238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,512,0.2353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,1024,0.2326
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,2048,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,4096,0.2590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,8192,0.2602
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,16384,0.2620
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,65536,0.3182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,131072,0.3884
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,32768,0.2838
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,2,0.2515
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,4,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,8,0.2384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,16,0.2423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,32,0.2403
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,64,0.2425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,128,0.2520
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,256,0.2424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,512,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,1024,0.2489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,2048,0.2565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,4096,0.2785
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,8192,0.2856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,16384,0.2885
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,32768,0.3117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,65536,0.3737
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,131072,0.4829
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,2,0.3111
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,4,0.3013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,8,0.2958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,16,0.2909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,32,0.2927
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,128,0.2949
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,64,0.2923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,256,0.2972
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,512,0.2978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,2048,0.3176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,1024,0.3078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,4096,0.3408
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,8192,0.3600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,16384,0.3687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,32768,0.4154
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,65536,0.5160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,2,0.3908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,131072,0.7043
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,4,0.3895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,8,0.3914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,16,0.3898
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,32,0.3908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,64,0.3891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,128,0.3949
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,256,0.3965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,512,0.4018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,2048,0.4427
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,1024,0.4145
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,4096,0.4723
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,16384,0.5581
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,8192,0.5010
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,32768,0.6785
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,65536,0.9139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,2,0.5578
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,4,0.5572
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,131072,1.4643
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,16,0.5964
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,8,0.6049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,32,0.5577
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,64,0.5602
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,128,0.5648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,256,0.6057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,512,0.5849
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,2048,0.6589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,1024,0.6095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,4096,0.7184
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,8192,0.7749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,16384,0.8870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,32768,1.1151
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,65536,1.6370
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,2,0.9312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,4,0.9382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,8,0.9423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,16,1.0340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,64,0.9565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,128,0.9569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,32,0.9474
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,256,1.0630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,512,0.9973
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,2048,1.1423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,1024,1.0476
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,2,0.1740
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,4096,1.2507
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,16384,1.5688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,8192,1.3567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,8,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,16,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,4,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,32,0.1703
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,32768,1.9946
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,64,0.1743
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,128,0.1603
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,256,0.1581
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,512,0.1670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,1024,0.1731
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,2048,0.1643
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,4096,0.1937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,16384,0.1771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,8192,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,32768,0.1919
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,65536,0.2054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,131072,0.2400
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,4,0.1616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,8,0.1654
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,16,0.1628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,2,0.1748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,32,0.1650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,64,0.1711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,128,0.1687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,256,0.1696
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,512,0.1626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,1024,0.1648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,2048,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,4096,0.1957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,8192,0.1854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,16384,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,32768,0.1947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,65536,0.2137
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,131072,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,2,0.1711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,4,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,8,0.1695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,16,0.1707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,32,0.1792
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,64,0.1767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,128,0.1773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,256,0.1887
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,512,0.1736
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,1024,0.1738
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,2048,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,4096,0.1994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,8192,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,16384,0.1985
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,32768,0.1992
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,65536,0.2361
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,131072,0.2582
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,2,0.1824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,4,0.1936
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,8,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,16,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,32,0.1897
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,64,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,128,0.1821
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,256,0.1807
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,512,0.1833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,1024,0.1973
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,2048,0.2008
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,4096,0.2095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,8192,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,16384,0.2060
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,32768,0.2183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,65536,0.2456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,131072,0.2954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,2,0.1918
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,4,0.1981
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,8,0.1958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,16,0.1895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,32,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,64,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,128,0.1944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,256,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,512,0.1957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,1024,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,2048,0.2080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,4096,0.2292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,8192,0.2319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,16384,0.2270
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,32768,0.2393
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,65536,0.2751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,131072,0.3170
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,2,0.2070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,4,0.1971
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,8,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,16,0.1971
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,32,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,64,0.2012
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,128,0.2027
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,256,0.2014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,512,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,1024,0.2152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,2048,0.2244
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,4096,0.2401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,8192,0.2568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,16384,0.2508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,32768,0.2673
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,65536,0.3057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,131072,0.3723
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,2,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,8,0.2160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,4,0.2084
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,16,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,32,0.2119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,64,0.2093
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,256,0.2185
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,512,0.2255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,128,0.2217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,2048,0.2497
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,1024,0.2308
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,4096,0.2726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,8192,0.2786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,16384,0.2855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,32768,0.3099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,65536,0.3654
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,2,0.2227
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,4,0.2284
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,8,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,131072,0.4786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,32,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,16,0.2227
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,64,0.2276
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,128,0.2269
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,256,0.2424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,512,0.2452
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,1024,0.2668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,2048,0.2941
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,4096,0.3189
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,8192,0.3338
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,16384,0.3500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,32768,0.3916
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,65536,0.4967
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,131072,0.6841
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,2,0.2658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,4,0.2623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,8,0.2623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,16,0.2646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,32,0.2665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,64,0.2656
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,128,0.2707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,256,0.2776
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,2048,0.4011
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,512,0.2977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,1024,0.3348
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,4096,0.4255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,16384,0.5123
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,8192,0.4588
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,32768,0.6316
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,65536,0.8562
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,2,0.3378
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,16,0.3364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,4,0.3354
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,8,0.3335
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,32,0.3381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,64,0.3360
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,131072,1.4182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,128,0.3400
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,256,0.3615
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,512,0.3924
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,2048,0.5843
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,4096,0.6388
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,1024,0.4578
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,16384,0.8068
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,8192,0.6933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,32768,1.0388
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,2,0.4846
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,8,0.4834
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,4,0.4839
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,16,0.4837
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,65536,1.5337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,32,0.4833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,64,0.4846
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,128,0.4909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,256,0.5264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,2048,0.9519
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,16384,1.3589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,512,0.5847
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,4096,1.0399
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,2,0.1686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,1024,0.7072
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,8192,1.1500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,32768,1.7880
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,4,0.1654
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,16,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,8,0.1698
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,32,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,64,0.1545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,128,0.1524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,256,0.1648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,512,0.1665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,1024,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,2048,0.1688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,4096,0.1733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,8192,0.1813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,16384,0.1725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,32768,0.1756
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,131072,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,2,0.1725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,4,0.1633
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,65536,0.2053
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,8,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,16,0.1564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,32,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,64,0.1723
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,256,0.1726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,512,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,128,0.1579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,1024,0.1724
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,2048,0.1612
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,4096,0.1876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,8192,0.1911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,16384,0.1849
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,32768,0.1798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,65536,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,131072,0.2347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,2,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,4,0.1630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,8,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,16,0.1768
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,32,0.1628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,64,0.1726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,128,0.1809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,256,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,512,0.1781
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,1024,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,2048,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,4096,0.1939
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,8192,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,16384,0.2026
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,32768,0.2029
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,65536,0.2121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,131072,0.2638
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,2,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,4,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,8,0.1748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,16,0.1859
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,32,0.1794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,64,0.1746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,128,0.1732
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,256,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,512,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,1024,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,2048,0.1859
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,4096,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,8192,0.2023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,32768,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,65536,0.2504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,131072,0.2927
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,16384,0.2144
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,2,0.1838
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,4,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,8,0.1867
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,16,0.1882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,32,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,64,0.1966
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,128,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,256,0.1853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,512,0.1962
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,1024,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,4096,0.2165
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,8192,0.2247
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,16384,0.2285
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,32768,0.2350
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,65536,0.2651
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,2048,0.2002
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,131072,0.3245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,2,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,4,0.1965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,32,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,8,0.1941
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,16,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,64,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,128,0.2009
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,256,0.1983
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,1024,0.2024
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,2048,0.2185
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,4096,0.2415
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,8192,0.2470
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,16384,0.2431
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,512,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,65536,0.2989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,131072,0.3669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,32768,0.2676
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,2,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,4,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,8,0.2012
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,16,0.2019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,32,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,64,0.2098
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,128,0.2012
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,256,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,512,0.2198
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,1024,0.2312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,2048,0.2430
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,4096,0.2628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,8192,0.2732
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,16384,0.2770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,32768,0.3045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,65536,0.3609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,131072,0.4778
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,2,0.2184
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,4,0.2146
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,8,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,16,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,32,0.2137
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,128,0.2232
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,256,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,512,0.2389
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,1024,0.2527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,64,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,2048,0.2868
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,8192,0.3240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,16384,0.3380
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,4096,0.3146
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,32768,0.3870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,65536,0.4855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,131072,0.6735
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,2,0.2507
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,4,0.2472
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,8,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,16,0.2484
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,32,0.2534
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,64,0.2558
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,128,0.2558
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,256,0.2694
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,512,0.2851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,1024,0.3183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,2048,0.3881
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,4096,0.4103
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,8192,0.4433
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,16384,0.4990
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,65536,0.8451
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,32768,0.6176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,4,0.3121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,2,0.3116
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,131072,1.4033
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,8,0.3134
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,16,0.3161
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,32,0.3156
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,64,0.3153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,128,0.3221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,256,0.3362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,2048,0.5641
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,512,0.3670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,4096,0.6131
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,1024,0.4325
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,8192,0.6707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,32768,1.0137
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,16384,0.7868
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,65536,1.5134
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,2,0.4397
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,4,0.4380
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,16,0.4382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,8,0.4378
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,32,0.4396
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,64,0.4400
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,128,0.4462
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,256,0.4815
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,512,0.5347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,2048,0.8979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,2,0.1623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,4096,0.9840
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,4,0.1489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,16384,1.3070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,8192,1.0937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,16,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,8,0.1525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,1024,0.6519
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,64,0.1521
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,32,0.1483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,128,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,256,0.1545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,32768,1.7341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,512,0.1604
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,1024,0.1537
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,2048,0.1508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,4096,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,8192,0.1865
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,16384,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,32768,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,65536,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,131072,0.2125
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,4,0.1648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,8,0.1624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,16,0.1586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,2,0.1697
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,32,0.1545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,64,0.1523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,128,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,256,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,512,0.1648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,1024,0.1625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,2048,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,4096,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,8192,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,16384,0.1809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,32768,0.1748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,65536,0.2170
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,2,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,8,0.1624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,4,0.1689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,16,0.1742
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,131072,0.2307
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,64,0.1628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,256,0.1623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,128,0.1686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,32,0.1607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,512,0.1716
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,1024,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,2048,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,4096,0.1916
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,8192,0.1909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,16384,0.1805
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,32768,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,65536,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,131072,0.2592
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,2,0.1849
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,4,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,8,0.1758
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,16,0.1746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,32,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,64,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,128,0.1864
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,256,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,512,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,1024,0.1769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,2048,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,8192,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,16384,0.2098
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,32768,0.2120
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,4096,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,65536,0.2403
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,131072,0.2787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,2,0.1928
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,4,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,8,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,16,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,32,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,64,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,128,0.1802
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,256,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,512,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,1024,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,2048,0.1925
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,4096,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,8192,0.2214
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,16384,0.2305
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,32768,0.2345
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,65536,0.2616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,131072,0.3181
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,2,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,4,0.1882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,8,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,16,0.1976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,32,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,64,0.1922
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,128,0.1890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,256,0.1944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,512,0.1972
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,2048,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,4096,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,8192,0.2402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,16384,0.2422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,1024,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,32768,0.2592
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,65536,0.3015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,131072,0.3685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,2,0.1976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,4,0.1981
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,8,0.1980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,16,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,32,0.1957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,64,0.2026
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,128,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,256,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,512,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,1024,0.2236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,2048,0.2464
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,4096,0.2606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,8192,0.2709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,16384,0.2725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,32768,0.3019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,65536,0.3619
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,131072,0.4690
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,2,0.2149
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,4,0.2109
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,8,0.2120
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,16,0.2133
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,32,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,64,0.2146
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,128,0.2201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,256,0.2271
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,512,0.2337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,1024,0.2500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,2048,0.2837
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,4096,0.3080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,8192,0.3238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,16384,0.3407
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,32768,0.3844
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,65536,0.4848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,2,0.2407
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,131072,0.6713
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,4,0.2432
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,8,0.2428
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,16,0.2483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,32,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,64,0.2484
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,128,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,256,0.2651
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,512,0.2776
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,1024,0.3196
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,2048,0.3769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,4096,0.4047
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,16384,0.4967
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,8192,0.4355
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,32768,0.6072
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,2,0.3022
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,65536,0.8384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,4,0.3027
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,16,0.3063
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,32,0.3033
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,131072,1.3951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,64,0.3048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,8,0.3029
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,128,0.3071
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,256,0.3311
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,512,0.3569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,2048,0.5459
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,1024,0.4216
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,4096,0.6021
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,8192,0.6597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,16384,0.7735
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,32768,1.0004
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,2,0.4186
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,65536,1.5138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,4,0.4208
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,16,0.4172
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,8,0.4378
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,32,0.4192
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,64,0.4207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,128,0.4277
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,256,0.4589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,512,0.5169
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,2048,0.8801
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,1024,0.6373
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,2,0.1486
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,4096,0.9627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,4,0.1626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,16384,1.2884
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,8,0.1487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,8192,1.0657
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,32768,1.7107
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,32,0.1620
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,64,0.1482
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,16,0.1624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,128,0.1505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,256,0.1486
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,512,0.1640
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,4096,0.1628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,1024,0.1501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,8192,0.1813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,2048,0.1564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,16384,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,32768,0.1688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,65536,0.1871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,131072,0.2289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,4,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,8,0.1631
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,16,0.1555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,32,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,64,0.1668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,128,0.1665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,256,0.1525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,1024,0.1554
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,512,0.1628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,2,0.1502
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,2048,0.1566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,4096,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,8192,0.1756
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,16384,0.1809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,32768,0.1757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,65536,0.1947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,131072,0.2309
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,2,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,4,0.1657
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,8,0.1650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,16,0.1733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,32,0.1649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,64,0.1753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,128,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,256,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,512,0.1730
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,1024,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,2048,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,4096,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,8192,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,16384,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,32768,0.1927
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,65536,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,131072,0.2594
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,2,0.1731
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,4,0.1853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,8,0.1730
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,16,0.1836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,32,0.1732
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,64,0.1707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,128,0.1766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,256,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,512,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,1024,0.1751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,2048,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,4096,0.2054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,8192,0.2069
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,16384,0.1997
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,32768,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,65536,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,131072,0.2755
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,2,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,4,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,8,0.1793
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,16,0.1871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,32,0.1894
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,64,0.1797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,128,0.1810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,256,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,512,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,1024,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,2048,0.1936
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,4096,0.2167
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,8192,0.2152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,16384,0.2266
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,32768,0.2310
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,65536,0.2633
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,2,0.1870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,131072,0.3215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,4,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,8,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,16,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,32,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,64,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,128,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,256,0.1850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,512,0.1911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,1024,0.2020
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,2048,0.2201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,4096,0.2287
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,8192,0.2397
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,16384,0.2476
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,32768,0.2538
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,131072,0.3625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,2,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,4,0.1974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,8,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,16,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,32,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,128,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,64,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,65536,0.2982
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,256,0.1971
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,512,0.2107
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,1024,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,2048,0.2434
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,4096,0.2576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,8192,0.2684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,16384,0.2724
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,32768,0.2953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,65536,0.3563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,2,0.2098
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,4,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,131072,0.4701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,8,0.2079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,16,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,32,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,64,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,128,0.2126
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,256,0.2250
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,512,0.2379
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,1024,0.2542
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,2048,0.2809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,4096,0.3051
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,16384,0.3322
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,8192,0.3219
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,32768,0.3822
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,65536,0.4832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,2,0.2416
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,131072,0.6715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,4,0.2382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,8,0.2417
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,16,0.2385
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,32,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,128,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,64,0.2406
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,256,0.2575
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,512,0.2791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,1024,0.3086
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,2048,0.3779
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,4096,0.4040
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,8192,0.4315
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,16384,0.4889
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,32768,0.6065
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,65536,0.8344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,2,0.2979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,8,0.2998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,4,0.3001
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,16,0.2982
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,131072,1.3938
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,32,0.2998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,64,0.2976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,128,0.3049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,256,0.3270
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,512,0.3554
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,2048,0.5437
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,1024,0.4157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,4096,0.5958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,16384,0.7635
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,8192,0.6563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,32768,0.9960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,2,0.4082
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,4,0.4083
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,65536,1.4980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,16,0.4090
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,8,0.4090
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,32,0.4087
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,64,0.4121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,128,0.4179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,256,0.4496
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,512,0.5048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,2048,0.8700
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,16384,1.2773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,4096,0.9485
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,1024,0.6271
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,8192,1.0617
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,32768,1.7005
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,2,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,4,0.1482
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,8,0.1480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,512,0.1449
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,16,0.1577
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,2048,0.1462
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,1024,0.1542
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,256,0.1603
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,4096,0.1589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,32,0.1598
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,64,0.1452
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,8192,0.1768
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,128,0.1499
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,32768,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,65536,0.1909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,16384,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,2,0.1475
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,4,0.1657
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,131072,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,8,0.1583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,16,0.1555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,32,0.1584
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,64,0.1527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,128,0.1482
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,256,0.1665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,512,0.1483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,1024,0.1666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,2048,0.1664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,4096,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,8192,0.1657
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,16384,0.1687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,32768,0.1816
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,65536,0.1915
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,131072,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,2,0.1705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,4,0.1624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,8,0.1608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,16,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,32,0.1739
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,64,0.1589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,128,0.1566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,256,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,512,0.1644
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,1024,0.1685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,2048,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,4096,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,8192,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,16384,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,32768,0.1849
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,65536,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,131072,0.2439
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,2,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,4,0.1787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,8,0.1687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,16,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,32,0.1686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,64,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,128,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,256,0.1748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,512,0.1689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,1024,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,2048,0.1840
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,4096,0.2006
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,8192,0.1928
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,16384,0.1987
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,32768,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,65536,0.2322
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,131072,0.2764
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,2,0.1870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,4,0.1752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,8,0.1884
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,16,0.1866
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,32,0.1777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,64,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,128,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,256,0.1830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,512,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,1024,0.1821
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,2048,0.2012
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,4096,0.2039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,32768,0.2361
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,65536,0.2667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,16384,0.2148
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,131072,0.3046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,2,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,4,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,8192,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,8,0.1829
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,16,0.1870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,32,0.1932
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,64,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,128,0.1815
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,256,0.1870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,512,0.1898
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,1024,0.1928
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,2048,0.2158
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,4096,0.2383
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,8192,0.2372
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,16384,0.2341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,32768,0.2515
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,65536,0.2944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,2,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,131072,0.3602
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,4,0.1960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,8,0.1951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,16,0.1903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,32,0.1895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,64,0.1961
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,128,0.1937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,512,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,256,0.1959
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,1024,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,2048,0.2357
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,4096,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,8192,0.2709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,16384,0.2684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,32768,0.2923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,65536,0.3503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,2,0.2046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,131072,0.4637
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,4,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,8,0.2037
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,16,0.2096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,32,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,64,0.2047
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,128,0.2069
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,256,0.2228
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,512,0.2341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,1024,0.2448
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,2048,0.2779
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,4096,0.3059
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,8192,0.3159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,16384,0.3302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,32768,0.3752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,65536,0.4778
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,2,0.2351
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,4,0.2342
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,8,0.2368
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,16,0.2368
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,131072,0.6706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,32,0.2353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,64,0.2343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,128,0.2397
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,256,0.2559
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,512,0.2756
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,2048,0.3731
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,4096,0.4008
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,8192,0.4296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,16384,0.4866
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,1024,0.3088
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,32768,0.6025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,65536,0.8329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,2,0.2948
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,4,0.2905
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,8,0.2915
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,16,0.2923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,131072,1.3908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,32,0.2959
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,64,0.2940
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,128,0.3014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,256,0.3198
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,512,0.3492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,1024,0.4100
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,2048,0.5328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,4096,0.5930
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,16384,0.7626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,8192,0.6498
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,32768,0.9910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,65536,1.4926
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,2,0.4031
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,4,0.4027
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,16,0.4035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,8,0.4021
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,32,0.4036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,64,0.4075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,128,0.4102
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,256,0.4429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,512,0.4989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,2048,0.8597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,4096,0.9429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,2,0.2258
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,1024,0.6117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,4,0.2363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,16384,1.2681
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,8,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,8192,1.0538
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,32768,1.6935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,16,0.2289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,32,0.2278
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,64,0.2363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,128,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,256,0.2290
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,1024,0.2376
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,512,0.2303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,2048,0.2399
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,4096,0.2505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,8192,0.2490
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,16384,0.2489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,32768,0.2443
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,65536,0.2637
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,4,0.2454
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,131072,0.2922
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,2,0.2426
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,8,0.2360
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,32,0.2436
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,16,0.2389
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,64,0.2371
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,128,0.2325
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,256,0.2343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,512,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,1024,0.2377
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,2048,0.2447
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,8192,0.2590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,16384,0.2528
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,32768,0.2518
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,4096,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,65536,0.2827
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,131072,0.3015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,2,0.2559
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,4,0.2477
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,8,0.2516
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,16,0.2533
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,32,0.2469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,64,0.2599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,128,0.2548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,512,0.2566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,256,0.2483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,2048,0.2491
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,1024,0.2492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,4096,0.2668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,8192,0.2754
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,16384,0.2733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,32768,0.2751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,65536,0.2877
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,131072,0.3220
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,2,0.2668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,4,0.2617
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,8,0.2637
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,16,0.2596
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,32,0.2687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,64,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,128,0.2599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,256,0.2626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,512,0.2692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,2048,0.2682
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,1024,0.2609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,8192,0.2899
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,4096,0.2758
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,16384,0.2737
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,32768,0.2854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,65536,0.3045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,131072,0.3530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,2,0.2832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,4,0.2739
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,8,0.2787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,16,0.2718
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,32,0.2708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,64,0.2746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,128,0.2708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,256,0.2816
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,512,0.2731
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,1024,0.2793
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,2048,0.2795
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,8192,0.2950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,4096,0.2889
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,16384,0.2944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,65536,0.3291
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,131072,0.3795
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,2,0.3015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,4,0.2981
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,32768,0.3115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,8,0.3003
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,16,0.2951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,64,0.2960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,32,0.2999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,256,0.3038
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,128,0.2957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,512,0.3039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,1024,0.3014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,2048,0.2980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,4096,0.3171
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,16384,0.3217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,8192,0.3284
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,32768,0.3402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,65536,0.3757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,131072,0.4526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,2,0.3504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,8,0.3547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,4,0.3525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,16,0.3534
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,32,0.3505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,64,0.3509
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,256,0.3519
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,128,0.3536
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,512,0.3505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,1024,0.3557
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,2048,0.3559
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,4096,0.3753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,8192,0.3894
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,16384,0.3867
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,32768,0.4127
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,65536,0.4722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,131072,0.5827
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,2,0.4566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,4,0.4542
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,8,0.4567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,16,0.4571
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,64,0.4524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,32,0.4514
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,128,0.4537
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,256,0.4536
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,512,0.4584
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,1024,0.4589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,2048,0.4599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,4096,0.4806
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,8192,0.5008
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,16384,0.5100
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,32768,0.5557
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,65536,0.6559
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,2,0.7049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,131072,0.8495
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,4,0.7016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,8,0.7041
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,16,0.7011
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,32,0.7038
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,64,0.7077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,128,0.7095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,256,0.7181
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,512,0.7163
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,1024,0.7206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,2048,0.7290
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,4096,0.7539
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,8192,0.7807
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,16384,0.8392
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,32768,0.9626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,65536,1.1835
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,131072,1.7409
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,2,1.1430
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,8,1.1417
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,16,1.1433
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,32,1.1442
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,4,1.1416
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,64,1.1533
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,128,1.1566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,256,1.1555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,512,1.1598
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,2048,1.1848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,1024,1.1688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,4096,1.2356
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,8192,1.2872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,16384,1.4003
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,32768,1.6309
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,65536,2.1306
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,2,1.9681
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,4,1.9651
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,8,1.9663
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,16,1.9708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,32,1.9717
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,64,1.9755
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,128,1.9792
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,256,1.9812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,512,1.9907
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,2048,2.0303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,2,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,1024,2.0050
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,4096,2.1225
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,16384,2.4361
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,4,0.2046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,8,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,8192,2.2255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,16,0.1950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,32768,2.8728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,32,0.2060
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,64,0.2050
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,128,0.1956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,256,0.1956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,512,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,1024,0.1978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,4096,0.2224
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,8192,0.2278
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,16384,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,32768,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,65536,0.2309
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,2048,0.2082
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,131072,0.2670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,2,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,4,0.2088
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,8,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,16,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,64,0.2018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,128,0.2116
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,256,0.2030
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,32,0.2088
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,512,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,1024,0.2129
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,2048,0.2120
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,4096,0.2227
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,8192,0.2217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,32768,0.2286
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,16384,0.2141
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,65536,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,131072,0.2771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,2,0.2193
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,4,0.2116
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,8,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,16,0.2106
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,32,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,64,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,128,0.2148
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,256,0.2170
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,512,0.2191
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,1024,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,4096,0.2265
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,2048,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,8192,0.2360
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,16384,0.2382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,32768,0.2374
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,65536,0.2586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,131072,0.2861
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,2,0.2151
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,8,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,4,0.2190
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,16,0.2271
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,32,0.2253
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,64,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,128,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,512,0.2235
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,256,0.2181
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,1024,0.2274
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,2048,0.2193
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,4096,0.2353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,8192,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,16384,0.2411
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,32768,0.2397
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,65536,0.2653
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,131072,0.3056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,2,0.2385
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,4,0.2288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,16,0.2301
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,32,0.2361
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,64,0.2361
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,128,0.2343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,256,0.2315
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,512,0.2383
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,8,0.2270
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,1024,0.2283
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,2048,0.2364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,4096,0.2462
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,8192,0.2596
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,32768,0.2626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,16384,0.2577
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,65536,0.2900
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,131072,0.3382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,2,0.2429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,4,0.2480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,8,0.2508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,16,0.2482
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,32,0.2469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,64,0.2466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,128,0.2441
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,256,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,512,0.2519
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,1024,0.2501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,2048,0.2460
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,4096,0.2652
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,8192,0.2705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,16384,0.2692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,32768,0.2883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,65536,0.3265
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,131072,0.4042
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,2,0.2745
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,4,0.2707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,8,0.2716
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,16,0.2750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,32,0.2749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,64,0.2737
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,128,0.2790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,256,0.2729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,512,0.2784
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,1024,0.2740
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,2048,0.2762
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,4096,0.2967
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,8192,0.3050
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,16384,0.3100
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,32768,0.3339
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,65536,0.4003
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,2,0.3296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,131072,0.5051
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,4,0.3260
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,8,0.3262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,16,0.3288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,32,0.3262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,64,0.3310
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,128,0.3265
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,256,0.3321
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,512,0.3275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,2048,0.3305
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,1024,0.3282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,4096,0.3532
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,8192,0.3734
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,16384,0.3878
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,32768,0.4283
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,65536,0.5275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,2,0.4640
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,4,0.4635
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,131072,0.7385
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,8,0.4608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,16,0.4645
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,32,0.4680
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,64,0.4672
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,128,0.4648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,256,0.4662
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,2048,0.4800
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,4096,0.5019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,512,0.4657
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,1024,0.4730
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,8192,0.5335
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,16384,0.5916
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,32768,0.7078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,2,0.7002
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,4,0.6999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,65536,0.9396
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,8,0.7025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,16,0.7031
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,32,0.7055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,64,0.7089
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,131072,1.4954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,256,0.7174
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,128,0.7177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,512,0.7184
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,2048,0.7383
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,1024,0.7276
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,4096,0.7896
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,16384,0.9556
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,8192,0.8458
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,2,1.1555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,32768,1.1885
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,4,1.1602
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,8,1.1568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,65536,1.6839
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,16,1.1598
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,32,1.1630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,64,1.1664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,128,1.1652
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,256,1.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,16384,1.6289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,512,1.1779
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,2048,1.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,4096,1.3048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,2,0.1848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,1024,1.1904
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,4,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,8,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,8192,1.4135
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,16,0.1906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,64,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,32,0.1854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,128,0.1840
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,256,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,512,0.1895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,2048,0.1923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,1024,0.1854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,4096,0.2009
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,32768,2.0558
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,8192,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,16384,0.2075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,32768,0.2091
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,65536,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,131072,0.2483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,2,0.1926
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,4,0.1992
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,8,0.2019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,16,0.2017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,32,0.1988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,64,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,128,0.1937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,256,0.1915
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,512,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,1024,0.1977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,2048,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,4096,0.2198
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,8192,0.2145
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,16384,0.2120
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,32768,0.2153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,65536,0.2412
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,131072,0.2610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,2,0.2034
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,4,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,8,0.2108
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,16,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,32,0.2064
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,64,0.2059
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,128,0.2048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,256,0.2057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,512,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,1024,0.2039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,2048,0.2102
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,4096,0.2236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,8192,0.2338
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,16384,0.2217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,32768,0.2346
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,65536,0.2458
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,2,0.2145
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,4,0.2135
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,131072,0.2824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,8,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,16,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,32,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,64,0.2204
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,128,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,256,0.2130
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,512,0.2183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,1024,0.2121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,2048,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,4096,0.2295
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,8192,0.2411
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,16384,0.2312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,32768,0.2365
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,65536,0.2685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,2,0.2216
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,131072,0.2992
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,4,0.2283
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,8,0.2278
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,16,0.2207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,32,0.2250
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,64,0.2309
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,128,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,256,0.2236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,512,0.2254
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,1024,0.2292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,2048,0.2302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,4096,0.2397
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,8192,0.2548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,16384,0.2413
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,32768,0.2529
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,65536,0.2873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,131072,0.3340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,2,0.2378
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,4,0.2378
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,8,0.2425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,16,0.2341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,32,0.2344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,64,0.2345
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,128,0.2387
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,256,0.2381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,512,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,1024,0.2410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,4096,0.2548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,2048,0.2416
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,8192,0.2607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,16384,0.2636
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,32768,0.2762
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,65536,0.3243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,131072,0.3859
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,2,0.2608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,4,0.2606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,8,0.2648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,16,0.2598
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,32,0.2625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,64,0.2665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,128,0.2605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,256,0.2605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,512,0.2629
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,1024,0.2666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,2048,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,4096,0.2817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,8192,0.2947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,16384,0.3032
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,32768,0.3238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,65536,0.3834
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,2,0.3132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,8,0.3080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,4,0.3082
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,131072,0.4998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,32,0.3080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,64,0.3110
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,16,0.3118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,128,0.3085
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,256,0.3140
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,512,0.3118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,1024,0.3160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,2048,0.3161
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,4096,0.3373
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,16384,0.3670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,8192,0.3533
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,32768,0.4161
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,65536,0.5160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,131072,0.7219
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,2,0.4290
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,4,0.4300
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,8,0.4299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,16,0.4290
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,32,0.4328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,64,0.4325
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,128,0.4311
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,256,0.4353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,512,0.4345
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,1024,0.4366
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,2048,0.4455
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,4096,0.4713
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,8192,0.4970
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,16384,0.5580
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,32768,0.6752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,65536,0.9087
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,2,0.6358
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,4,0.6369
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,8,0.6379
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,16,0.6383
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,131072,1.4656
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,32,0.6391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,64,0.6456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,128,0.6508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,256,0.6495
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,512,0.6519
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,2048,0.6722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,1024,0.6652
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,4096,0.7245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,8192,0.7796
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,16384,0.8938
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,32768,1.1233
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,2,1.0461
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,4,1.0464
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,16,1.0525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,8,1.0494
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,65536,1.6219
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,32,1.0563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,64,1.0595
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,128,1.0635
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,256,1.0603
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,512,1.0671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,2048,1.1128
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,4096,1.1981
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,1024,1.0839
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,2,0.1778
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,8192,1.3043
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,16384,1.5222
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,4,0.1769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,8,0.1740
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,16,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,32,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,32768,1.9443
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,64,0.1902
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,128,0.1830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,512,0.1745
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,1024,0.1871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,2048,0.1778
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,4096,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,256,0.1746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,8192,0.2066
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,16384,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,32768,0.1993
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,65536,0.2126
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,131072,0.2504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,2,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,4,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,8,0.1921
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,16,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,32,0.1862
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,128,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,512,0.1899
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,1024,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,2048,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,256,0.1894
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,64,0.1965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,4096,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,8192,0.1998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,32768,0.2109
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,65536,0.2249
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,131072,0.2658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,16384,0.1967
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,2,0.1923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,4,0.1901
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,8,0.1965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,16,0.2006
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,32,0.2006
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,64,0.1937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,128,0.1936
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,256,0.1923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,512,0.2076
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,1024,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,2048,0.1985
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,4096,0.2158
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,8192,0.2103
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,16384,0.2163
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,32768,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,65536,0.2388
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,131072,0.2828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,2,0.2053
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,4,0.2023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,8,0.1999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,16,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,32,0.2037
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,64,0.2100
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,128,0.2017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,256,0.2039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,512,0.2099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,1024,0.2029
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,2048,0.2012
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,4096,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,8192,0.2349
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,16384,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,32768,0.2354
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,65536,0.2529
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,131072,0.2917
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,2,0.2206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,4,0.2160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,8,0.2222
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,16,0.2128
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,32,0.2177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,64,0.2193
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,128,0.2142
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,256,0.2149
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,512,0.2243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,1024,0.2160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,2048,0.2243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,4096,0.2363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,8192,0.2404
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,16384,0.2370
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,32768,0.2547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,65536,0.2778
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,131072,0.3366
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,2,0.2245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,4,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,8,0.2303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,16,0.2317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,32,0.2289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,64,0.2246
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,128,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,256,0.2246
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,512,0.2285
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,1024,0.2283
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,2048,0.2360
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,4096,0.2519
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,8192,0.2548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,16384,0.2633
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,32768,0.2751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,65536,0.3117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,131072,0.3779
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,2,0.2524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,4,0.2506
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,8,0.2499
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,16,0.2549
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,32,0.2504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,64,0.2546
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,128,0.2503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,256,0.2508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,512,0.2591
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,1024,0.2585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,2048,0.2584
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,4096,0.2741
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,8192,0.2859
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,16384,0.2899
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,32768,0.3186
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,65536,0.3798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,2,0.2955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,131072,0.4868
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,4,0.2905
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,8,0.2898
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,16,0.2909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,32,0.2917
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,64,0.2958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,128,0.2967
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,256,0.2937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,512,0.3009
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,2048,0.3010
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,1024,0.2968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,4096,0.3224
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,16384,0.3561
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,8192,0.3384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,32768,0.4018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,65536,0.4976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,2,0.4022
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,131072,0.7054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,4,0.4021
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,8,0.4046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,16,0.4029
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,32,0.4042
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,64,0.4055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,128,0.4085
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,256,0.4076
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,512,0.4099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,1024,0.4148
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,2048,0.4193
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,4096,0.4460
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,16384,0.5303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,8192,0.4741
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,32768,0.6530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,65536,0.8827
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,4,0.5863
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,2,0.5849
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,8,0.5879
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,16,0.5868
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,32,0.5887
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,131072,1.4414
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,64,0.5938
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,128,0.5980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,256,0.5964
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,512,0.6006
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,2048,0.6200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,1024,0.6083
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,16384,0.8427
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,4096,0.6737
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,8192,0.7303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,32768,1.0743
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,65536,1.5707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,2,0.9382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,16,0.9389
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,4,0.9386
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,8,0.9401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,32,0.9453
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,64,0.9459
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,128,0.9495
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,256,0.9503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,512,0.9568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,2,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,2048,0.9976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,16384,1.4114
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,4096,1.0844
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,4,0.1767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,8,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,8192,1.1951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,16,0.1808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,32,0.1766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,32768,1.8484
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,64,0.1730
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,128,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,256,0.1711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,1024,0.9763
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,512,0.1759
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,2048,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,4096,0.1813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,8192,0.1990
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,32768,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,1024,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,65536,0.2214
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,131072,0.2349
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,2,0.1855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,16384,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,4,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,8,0.1888
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,16,0.1766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,32,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,64,0.1921
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,128,0.1849
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,256,0.1871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,512,0.1814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,1024,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,2048,0.1763
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,4096,0.1993
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,8192,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,32768,0.1982
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,16384,0.2048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,65536,0.2299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,131072,0.2479
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,2,0.1898
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,4,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,8,0.2017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,16,0.1843
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,32,0.1890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,64,0.1932
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,128,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,256,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,512,0.2028
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,1024,0.1890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,2048,0.1903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,4096,0.2059
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,16384,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,32768,0.2251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,65536,0.2400
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,131072,0.2754
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,2,0.1991
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,8192,0.2075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,4,0.1961
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,8,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,16,0.2076
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,32,0.2025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,64,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,128,0.2051
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,256,0.1941
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,512,0.1957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,1024,0.1965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,2048,0.2120
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,4096,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,8192,0.2214
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,16384,0.2218
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,32768,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,65536,0.2565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,131072,0.3008
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,2,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,8,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,4,0.2088
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,16,0.2150
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,32,0.2091
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,64,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,128,0.2196
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,256,0.2173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,512,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,1024,0.2136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,2048,0.2187
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,4096,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,8192,0.2339
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,16384,0.2336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,32768,0.2568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,65536,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,131072,0.3307
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,2,0.2220
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,4,0.2208
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,8,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,16,0.2207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,32,0.2298
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,64,0.2211
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,128,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,256,0.2237
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,512,0.2248
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,1024,0.2279
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,2048,0.2279
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,4096,0.2531
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,8192,0.2527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,16384,0.2518
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,32768,0.2672
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,65536,0.3103
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,131072,0.3853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,2,0.2465
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,8,0.2443
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,4,0.2451
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,16,0.2466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,32,0.2443
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,64,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,128,0.2490
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,256,0.2470
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,512,0.2492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,1024,0.2503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,4096,0.2702
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,8192,0.2848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,16384,0.2869
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,2048,0.2524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,32768,0.3144
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,65536,0.3716
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,131072,0.4825
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,2,0.2833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,4,0.2813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,8,0.2800
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,16,0.2873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,32,0.2838
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,64,0.2817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,128,0.2818
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,256,0.2863
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,512,0.2937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,2048,0.2934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,1024,0.2950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,4096,0.3156
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,16384,0.3488
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,8192,0.3310
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,32768,0.3903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,65536,0.4966
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,131072,0.6889
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,2,0.3895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,4,0.3889
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,8,0.3904
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,16,0.3891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,32,0.3922
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,64,0.3912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,128,0.3947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,256,0.3941
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,2048,0.4066
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,512,0.3947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,1024,0.3977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,4096,0.4324
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,16384,0.5165
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,8192,0.4630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,32768,0.6400
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,65536,0.8665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,2,0.5647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,4,0.5613
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,8,0.5625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,32,0.5682
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,16,0.5650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,131072,1.4270
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,64,0.5695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,128,0.5725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,256,0.5750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,512,0.5752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,2048,0.5998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,4096,0.6493
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,1024,0.5862
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,16384,0.8166
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,8192,0.7023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,32768,1.0450
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,2,0.8840
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,4,0.8889
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,8,0.8901
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,16,0.8906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,32,0.8907
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,65536,1.5465
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,64,0.8922
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,128,0.8952
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,256,0.9008
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,2048,0.9472
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,512,0.9031
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,2,0.1769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,4096,1.0355
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,1024,0.9229
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,8192,1.1428
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,4,0.1651
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,8,0.1823
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,16384,1.3559
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,16,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,32,0.1690
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,64,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,128,0.1815
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,32768,1.7874
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,256,0.1684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,512,0.1753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,1024,0.1688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,2048,0.1747
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,4096,0.1769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,8192,0.1976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,16384,0.1771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,32768,0.1868
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,131072,0.2389
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,65536,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,4,0.1877
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,2,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,8,0.1830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,16,0.1774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,32,0.1771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,64,0.1694
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,128,0.1888
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,512,0.1719
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,256,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,2048,0.1768
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,4096,0.1949
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,8192,0.1956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,1024,0.1746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,16384,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,32768,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,65536,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,131072,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,2,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,4,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,8,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,16,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,32,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,64,0.1917
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,128,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,256,0.1859
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,512,0.1809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,1024,0.1819
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,2048,0.1943
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,8192,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,4096,0.2043
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,16384,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,32768,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,65536,0.2278
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,2,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,131072,0.2742
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,4,0.1937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,8,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,16,0.2030
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,64,0.1969
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,32,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,128,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,256,0.2058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,512,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,1024,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,2048,0.1959
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,4096,0.2112
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,8192,0.2280
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,16384,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,32768,0.2344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,65536,0.2499
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,131072,0.2862
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,2,0.2031
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,4,0.2050
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,8,0.2073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,16,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,32,0.2021
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,64,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,128,0.2116
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,256,0.2173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,512,0.2037
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,1024,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,2048,0.2058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,4096,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,8192,0.2315
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,16384,0.2384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,32768,0.2483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,65536,0.2815
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,2,0.2158
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,131072,0.3183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,4,0.2202
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,8,0.2161
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,16,0.2183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,32,0.2257
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,64,0.2228
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,128,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,256,0.2172
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,512,0.2189
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,1024,0.2202
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,2048,0.2344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,4096,0.2456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,8192,0.2549
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,16384,0.2497
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,65536,0.3035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,131072,0.3739
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,32768,0.2712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,2,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,4,0.2401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,8,0.2439
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,16,0.2418
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,32,0.2412
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,64,0.2402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,128,0.2477
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,256,0.2427
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,512,0.2469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,1024,0.2469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,2048,0.2493
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,4096,0.2722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,8192,0.2791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,16384,0.2825
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,32768,0.3142
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,65536,0.3681
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,2,0.2812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,131072,0.4801
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,4,0.2784
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,8,0.2763
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,16,0.2767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,32,0.2798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,64,0.2813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,128,0.2795
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,256,0.2800
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,512,0.2890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,1024,0.2866
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,2048,0.2906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,4096,0.3124
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,8192,0.3337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,16384,0.3453
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,32768,0.3881
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,65536,0.4867
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,131072,0.6920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,2,0.3829
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,4,0.3843
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,8,0.3831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,16,0.3818
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,32,0.3851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,64,0.3833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,128,0.3844
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,256,0.3883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,512,0.3881
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,2048,0.4014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,1024,0.3927
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,4096,0.4242
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,8192,0.4561
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,16384,0.5110
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,32768,0.6319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,65536,0.8641
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,2,0.5513
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,16,0.5506
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,32,0.5549
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,4,0.5513
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,131072,1.4301
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,8,0.5513
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,64,0.5559
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,128,0.5622
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,256,0.5621
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,512,0.5665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,1024,0.5738
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,2048,0.5871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,4096,0.6343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,16384,0.8035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,8192,0.6938
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,32768,1.0371
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,2,0.8614
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,4,0.8631
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,65536,1.5380
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,8,0.8607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,16,0.8631
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,64,0.8690
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,32,0.8685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,128,0.8721
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,256,0.8739
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,512,0.8837
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,1024,0.8947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,2048,0.9255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,2,0.1648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,16384,1.3326
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,4,0.1743
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,4096,1.0120
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,8192,1.1219
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,8,0.1648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,16,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,32,0.1745
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,64,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,128,0.1747
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,32768,1.7696
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,256,0.1684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,512,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,1024,0.1649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,2048,0.1761
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,4096,0.1889
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,8192,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,16384,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,32768,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,65536,0.2029
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,131072,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,2,0.1726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,4,0.1849
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,8,0.1858
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,16,0.1739
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,32,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,64,0.1727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,128,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,256,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,512,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,1024,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,2048,0.1739
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,4096,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,8192,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,16384,0.1833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,32768,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,65536,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,131072,0.2497
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,2,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,4,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,8,0.1849
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,16,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,32,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,64,0.1801
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,128,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,512,0.1843
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,1024,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,256,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,2048,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,4096,0.1985
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,8192,0.2140
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,16384,0.1983
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,65536,0.2304
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,131072,0.2698
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,32768,0.2132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,2,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,4,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,8,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,16,0.1928
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,32,0.1911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,64,0.1902
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,128,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,256,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,512,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,1024,0.2057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,2048,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,4096,0.2108
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,8192,0.2211
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,16384,0.2098
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,32768,0.2304
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,65536,0.2486
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,131072,0.2897
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,2,0.2114
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,4,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,8,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,16,0.2081
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,32,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,64,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,128,0.2054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,256,0.2140
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,512,0.2049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,1024,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,2048,0.2058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,4096,0.2318
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,8192,0.2254
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,16384,0.2370
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,32768,0.2491
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,65536,0.2677
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,131072,0.3224
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,2,0.2145
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,4,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,8,0.2133
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,32,0.2136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,64,0.2223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,128,0.2204
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,256,0.2246
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,512,0.2193
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,1024,0.2183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,2048,0.2220
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,16,0.2137
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,4096,0.2381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,8192,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,16384,0.2524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,32768,0.2688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,65536,0.3051
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,131072,0.3706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,2,0.2361
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,4,0.2364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,8,0.2432
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,16,0.2402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,32,0.2429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,64,0.2398
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,128,0.2459
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,256,0.2390
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,512,0.2402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,1024,0.2455
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,2048,0.2523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,4096,0.2668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,8192,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,16384,0.2803
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,32768,0.3121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,65536,0.3659
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,2,0.2777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,131072,0.4737
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,4,0.2777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,8,0.2729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,16,0.2741
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,32,0.2752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,64,0.2739
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,128,0.2797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,256,0.2798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,512,0.2853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,1024,0.2853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,2048,0.2876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,4096,0.3108
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,8192,0.3264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,16384,0.3449
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,32768,0.3858
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,65536,0.4892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,2,0.3785
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,4,0.3785
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,131072,0.6843
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,8,0.3809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,16,0.3786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,32,0.3790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,64,0.3824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,128,0.3817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,256,0.3820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,512,0.3833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,1024,0.3902
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,2048,0.3987
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,4096,0.4227
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,16384,0.5104
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,8192,0.4509
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,32768,0.6280
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,65536,0.8601
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,2,0.5472
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,131072,1.4165
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,4,0.5458
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,8,0.5450
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,32,0.5462
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,16,0.5444
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,64,0.5486
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,128,0.5560
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,256,0.5590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,512,0.5610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,1024,0.5655
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,2048,0.5794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,4096,0.6322
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,8192,0.6886
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,16384,0.7948
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,32768,1.0309
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,2,0.8503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,4,0.8508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,65536,1.5451
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,8,0.8492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,16,0.8508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,32,0.8543
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,64,0.8552
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,128,0.8604
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,256,0.8632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,512,0.8680
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,2,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,2048,0.9109
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,4096,1.0019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,16384,1.3193
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,1024,0.8837
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,4,0.1605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,8192,1.1071
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,8,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,32768,1.7600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,16,0.1630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,32,0.1747
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,64,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,128,0.1608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,256,0.1744
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,512,0.1666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,1024,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,2048,0.1644
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,4096,0.1767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,8192,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,16384,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,32768,0.1853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,65536,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,131072,0.2244
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,2,0.1720
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,4,0.1706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,8,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,16,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,32,0.1707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,64,0.1744
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,128,0.1829
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,256,0.1821
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,512,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,1024,0.1793
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,2048,0.1769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,4096,0.1850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,8192,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,16384,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,32768,0.1973
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,65536,0.2238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,131072,0.2530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,2,0.1771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,4,0.1808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,8,0.1769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,16,0.1771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,32,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,64,0.1767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,256,0.1772
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,512,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,1024,0.1855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,2048,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,4096,0.1904
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,8192,0.2096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,128,0.1757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,16384,0.1919
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,32768,0.1993
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,65536,0.2266
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,131072,0.2653
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,2,0.1979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,4,0.1974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,8,0.1887
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,16,0.1883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,32,0.1882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,64,0.1874
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,256,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,512,0.1972
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,1024,0.1965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,2048,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,4096,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,128,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,8192,0.2183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,16384,0.2065
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,32768,0.2183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,65536,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,131072,0.2916
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,2,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,4,0.1978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,8,0.2120
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,16,0.1950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,32,0.2010
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,64,0.1988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,128,0.2061
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,512,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,1024,0.1997
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,4096,0.2141
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,256,0.1972
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,8192,0.2257
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,2048,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,16384,0.2319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,32768,0.2442
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,65536,0.2690
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,131072,0.3151
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,2,0.2102
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,4,0.2125
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,8,0.2166
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,16,0.2095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,32,0.2205
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,64,0.2149
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,128,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,256,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,512,0.2119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,2048,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,4096,0.2415
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,8192,0.2429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,16384,0.2483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,32768,0.2630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,65536,0.2973
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,131072,0.3674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,1024,0.2142
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,2,0.2403
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,4,0.2364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,8,0.2377
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,16,0.2353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,32,0.2328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,64,0.2344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,128,0.2352
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,256,0.2406
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,512,0.2348
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,2048,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,1024,0.2435
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,4096,0.2615
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,8192,0.2733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,16384,0.2770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,32768,0.3022
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,65536,0.3670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,2,0.2684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,131072,0.4707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,4,0.2730
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,8,0.2709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,16,0.2745
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,32,0.2699
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,64,0.2709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,128,0.2769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,256,0.2727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,512,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,2048,0.2879
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,1024,0.2797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,4096,0.3101
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,16384,0.3364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,8192,0.3280
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,32768,0.3817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,65536,0.4812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,2,0.3780
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,4,0.3768
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,16,0.3752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,8,0.3763
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,131072,0.6758
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,32,0.3753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,64,0.3769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,128,0.3753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,256,0.3820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,512,0.3804
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,2048,0.3957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,1024,0.3857
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,4096,0.4190
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,16384,0.5055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,8192,0.4475
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,32768,0.6222
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,65536,0.8565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,4,0.5414
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,2,0.5391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,8,0.5418
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,16,0.5398
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,32,0.5413
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,131072,1.4140
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,64,0.5456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,128,0.5518
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,256,0.5520
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,2048,0.5759
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,512,0.5531
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,4096,0.6265
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,16384,0.7930
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,1024,0.5625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,8192,0.6809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,32768,1.0269
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,65536,1.5204
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,2,0.8437
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,4,0.8425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,8,0.8432
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,16,0.8420
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,32,0.8465
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,64,0.8516
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,128,0.8517
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,256,0.8516
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,2048,0.9079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,512,0.8596
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,4096,0.9887
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,1024,0.8813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,8192,1.0977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,2,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,4,0.1973
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,8,0.1981
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,32768,1.7463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,16,0.1994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,16384,1.3154
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,32,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,64,0.1974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,128,0.2025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,256,0.1976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,512,0.1990
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,1024,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,2048,0.2034
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,8192,0.2201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,16384,0.2220
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,65536,0.2349
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,131072,0.2699
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,32768,0.2324
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,4096,0.2178
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,2,0.2162
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,4,0.2058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,8,0.2046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,16,0.2136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,32,0.2091
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,128,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,256,0.2071
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,1024,0.2070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,2048,0.2084
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,64,0.2146
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,512,0.2158
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,4096,0.2342
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,8192,0.2347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,16384,0.2223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,32768,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,65536,0.2451
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,131072,0.2725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,2,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,4,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,8,0.2152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,16,0.2202
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,32,0.2182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,64,0.2136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,128,0.2130
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,512,0.2110
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,256,0.2127
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,1024,0.2238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,2048,0.2193
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,4096,0.2384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,8192,0.2384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,16384,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,32768,0.2364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,65536,0.2569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,131072,0.2969
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,2,0.2204
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,4,0.2259
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,8,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,16,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,32,0.2201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,64,0.2227
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,128,0.2284
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,256,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,512,0.2223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,1024,0.2259
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,2048,0.2320
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,4096,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,8192,0.2564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,16384,0.2466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,32768,0.2523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,65536,0.2807
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,2,0.2302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,131072,0.3149
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,4,0.2313
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,8,0.2377
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,16,0.2414
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,32,0.2321
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,64,0.2325
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,128,0.2313
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,256,0.2410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,512,0.2375
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,1024,0.2407
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,4096,0.2627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,8192,0.2627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,16384,0.2605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,2048,0.2401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,32768,0.2667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,65536,0.3063
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,131072,0.3514
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,2,0.2493
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,4,0.2491
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,8,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,16,0.2500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,32,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,64,0.2459
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,128,0.2528
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,256,0.2500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,512,0.2493
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,1024,0.2532
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,2048,0.2560
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,4096,0.2722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,8192,0.2840
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,16384,0.2851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,32768,0.2943
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,65536,0.3359
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,2,0.2691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,131072,0.4042
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,4,0.2724
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,8,0.2682
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,16,0.2769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,32,0.2730
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,64,0.2689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,128,0.2693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,256,0.2691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,512,0.2821
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,2048,0.2851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,4096,0.3058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,8192,0.3153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,16384,0.3159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,1024,0.2771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,32768,0.3415
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,65536,0.4038
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,2,0.3437
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,131072,0.5121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,4,0.3450
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,8,0.3408
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,16,0.3424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,32,0.3445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,64,0.3408
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,128,0.3448
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,256,0.3414
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,512,0.3449
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,1024,0.3563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,2048,0.3678
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,4096,0.3900
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,8192,0.4123
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,16384,0.4242
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,32768,0.4652
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,65536,0.5645
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,2,0.4767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,4,0.4784
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,8,0.4810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,16,0.4822
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,32,0.4804
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,131072,0.7645
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,64,0.4790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,128,0.4837
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,256,0.4865
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,512,0.4977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,2048,0.5401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,4096,0.5724
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,1024,0.5143
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,16384,0.6548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,8192,0.5983
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,32768,0.7746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,65536,1.0058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,2,0.7524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,4,0.7528
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,8,0.7523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,16,0.7524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,32,0.8336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,64,0.7521
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,131072,1.5601
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,128,0.7586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,256,0.7664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,512,0.7800
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,2048,0.8554
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,4096,0.9154
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,1024,0.8029
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,8192,0.9733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,16384,1.0820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,32768,1.3170
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,2,1.3126
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,4,1.3107
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,8,1.3144
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,65536,1.8206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,16,1.3184
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,32,1.4690
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,64,1.3306
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,128,1.3389
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,256,1.3456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,512,1.3744
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,2048,1.5183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,1024,1.4482
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,4096,1.6201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,2,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,4,0.1865
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,16384,1.9476
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,8192,1.7318
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,32768,2.3845
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,8,0.1985
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,16,0.1748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,32,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,64,0.1795
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,128,0.1787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,256,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,2048,0.1768
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,512,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,4096,0.2070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,8192,0.2023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,1024,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,16384,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,32768,0.1998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,65536,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,2,0.1963
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,131072,0.2552
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,4,0.1941
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,8,0.1903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,16,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,32,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,64,0.1932
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,128,0.1839
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,256,0.1969
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,512,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,1024,0.1818
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,4096,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,8192,0.2105
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,16384,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,32768,0.2158
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,65536,0.2333
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,2048,0.1842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,131072,0.2752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,2,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,4,0.1876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,8,0.1907
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,16,0.1906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,32,0.1990
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,64,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,128,0.2021
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,256,0.1882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,512,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,1024,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,2048,0.1978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,4096,0.2135
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,8192,0.2283
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,16384,0.2080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,32768,0.2250
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,65536,0.2510
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,131072,0.2846
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,2,0.1980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,4,0.2076
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,8,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,16,0.1977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,32,0.1999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,64,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,128,0.2075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,256,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,512,0.2005
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,1024,0.2099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,2048,0.2088
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,4096,0.2181
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,8192,0.2336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,16384,0.2350
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,32768,0.2429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,65536,0.2675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,131072,0.3066
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,2,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,4,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,8,0.2128
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,16,0.2194
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,32,0.2166
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,64,0.2119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,128,0.2160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,256,0.2111
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,512,0.2207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,1024,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,2048,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,4096,0.2475
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,8192,0.2534
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,16384,0.2419
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,32768,0.2572
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,65536,0.2814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,131072,0.3314
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,2,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,4,0.2262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,8,0.2326
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,16,0.2219
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,32,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,64,0.2258
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,128,0.2274
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,256,0.2302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,512,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,1024,0.2401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,2048,0.2409
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,4096,0.2548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,16384,0.2662
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,32768,0.2804
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,65536,0.3241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,131072,0.3876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,2,0.2423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,4,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,8192,0.2630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,8,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,16,0.2502
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,32,0.2590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,64,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,128,0.2428
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,256,0.2471
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,512,0.2493
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,1024,0.2555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,2048,0.2670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,4096,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,16384,0.2944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,8192,0.2924
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,32768,0.3183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,65536,0.3776
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,131072,0.4888
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,2,0.3046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,4,0.3073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,8,0.3056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,16,0.3036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,32,0.3039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,64,0.3227
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,128,0.3034
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,256,0.3083
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,512,0.3094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,1024,0.3182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,2048,0.3302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,4096,0.3533
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,8192,0.3700
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,16384,0.3854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,32768,0.4275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,65536,0.5311
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,2,0.4124
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,131072,0.7310
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,4,0.4459
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,8,0.4124
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,16,0.4132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,32,0.4138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,64,0.4121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,128,0.4147
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,256,0.4397
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,512,0.4344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,2048,0.4680
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,1024,0.4391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,4096,0.4995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,8192,0.5293
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,16384,0.5855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,32768,0.7044
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,65536,0.9366
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,2,0.6214
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,4,0.6209
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,8,0.7031
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,131072,1.4887
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,16,0.6237
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,32,0.6284
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,64,0.6347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,128,0.6404
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,256,0.6453
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,512,0.6577
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,1024,0.6835
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,2048,0.7347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,4096,0.7931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,16384,0.9640
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,8192,0.8512
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,65536,1.7097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,2,1.0876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,32768,1.1856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,8,1.0869
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,4,1.0878
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,16,1.0911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,32,1.0961
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,64,1.1008
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,128,1.2253
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,256,1.1179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,2048,1.2872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,512,1.1411
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,4096,1.3970
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,1024,1.1908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,2,0.1670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,4,0.1648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,8,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,8192,1.5040
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,16384,1.7136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,16,0.1746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,64,0.1686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,128,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,256,0.1652
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,32768,2.1512
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,32,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,512,0.1777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,1024,0.1631
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,2048,0.1648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,4096,0.1855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,8192,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,16384,0.1796
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,32768,0.2001
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,65536,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,131072,0.2435
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,2,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,4,0.1727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,8,0.1685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,16,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,32,0.1722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,64,0.1836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,128,0.1825
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,256,0.1712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,512,0.1743
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,1024,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,2048,0.1734
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,4096,0.1838
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,8192,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,32768,0.1911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,65536,0.2258
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,16384,0.2008
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,131072,0.2477
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,2,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,4,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,8,0.1794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,16,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,32,0.1880
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,64,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,128,0.1905
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,256,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,512,0.1808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,1024,0.1826
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,2048,0.1850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,4096,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,8192,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,16384,0.2063
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,32768,0.2030
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,65536,0.2424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,131072,0.2651
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,2,0.1896
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,4,0.1887
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,8,0.1960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,16,0.1874
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,32,0.1972
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,64,0.1977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,128,0.1862
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,256,0.1911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,512,0.1900
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,1024,0.1906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,2048,0.2037
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,4096,0.2166
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,8192,0.2122
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,16384,0.2127
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,32768,0.2320
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,65536,0.2584
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,131072,0.2937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,4,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,8,0.2034
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,2,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,16,0.1956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,32,0.1974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,64,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,128,0.2033
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,256,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,512,0.1980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,1024,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,2048,0.2176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,4096,0.2236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,16384,0.2353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,8192,0.2411
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,32768,0.2436
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,65536,0.2750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,131072,0.3268
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,2,0.2058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,4,0.2057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,8,0.2137
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,16,0.2135
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,32,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,64,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,128,0.2070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,256,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,512,0.2099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,2048,0.2357
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,4096,0.2457
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,8192,0.2537
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,1024,0.2197
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,16384,0.2530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,32768,0.2644
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,65536,0.3079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,131072,0.3712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,2,0.2288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,4,0.2265
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,8,0.2235
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,16,0.2244
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,32,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,64,0.2262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,128,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,256,0.2252
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,512,0.2344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,2048,0.2473
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,1024,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,4096,0.2634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,8192,0.2759
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,16384,0.2761
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,32768,0.3021
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,65536,0.3628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,131072,0.4721
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,2,0.2772
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,4,0.2732
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,8,0.2749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,16,0.2770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,32,0.2738
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,64,0.2740
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,128,0.2762
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,256,0.2832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,512,0.2823
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,1024,0.2996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,2048,0.3036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,4096,0.3259
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,16384,0.3585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,8192,0.3428
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,32768,0.4041
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,65536,0.4994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,131072,0.6962
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,2,0.3671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,4,0.3664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,32,0.3668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,64,0.3675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,128,0.3677
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,8,0.3655
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,16,0.3670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,256,0.3950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,512,0.3794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,1024,0.3944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,2048,0.4198
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,4096,0.4520
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,8192,0.4785
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,16384,0.5353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,32768,0.6541
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,65536,0.8900
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,2,0.5243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,4,0.5253
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,8,0.5269
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,16,0.6088
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,131072,1.4431
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,32,0.5268
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,64,0.5310
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,128,0.5325
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,256,0.5386
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,512,0.5533
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,2048,0.6281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,1024,0.5779
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,4096,0.6900
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,16384,0.8564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,8192,0.7455
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,32768,1.0856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,65536,1.5888
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,2,1.0346
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,8,0.8832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,4,0.8831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,16,0.8879
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,64,0.9054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,32,0.8996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,128,0.9057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,256,0.9165
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,512,0.9484
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,2048,1.0908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,16384,1.5184
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,2,0.1443
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,32768,1.9375
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,4,0.1528
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,4096,1.1968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,1024,0.9918
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,8192,1.3065
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,8,0.1442
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,16,0.1461
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,32,0.1440
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,64,0.1526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,128,0.1459
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,256,0.1482
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,512,0.1551
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,1024,0.1463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,2048,0.1579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,4096,0.1686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,16384,0.1624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,8192,0.1645
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,32768,0.1650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,65536,0.2042
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,2,0.1492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,131072,0.2160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,4,0.1579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,16,0.1462
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,8,0.1605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,32,0.1504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,64,0.1503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,128,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,256,0.1483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,512,0.1573
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,1024,0.1482
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,2048,0.1566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,4096,0.1603
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,8192,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,16384,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,32768,0.1824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,65536,0.1924
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,131072,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,2,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,4,0.1542
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,8,0.1566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,16,0.1603
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,64,0.1650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,128,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,256,0.1566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,32,0.1666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,512,0.1628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,1024,0.1583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,2048,0.1605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,4096,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,8192,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,16384,0.1756
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,32768,0.1793
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,65536,0.2119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,131072,0.2401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,2,0.1723
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,4,0.1760
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,8,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,16,0.1764
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,32,0.1665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,64,0.1748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,128,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,256,0.1668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,512,0.1665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,1024,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,2048,0.1809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,4096,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,8192,0.1937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,16384,0.1993
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,32768,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,65536,0.2328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,131072,0.2732
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,2,0.1860
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,4,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,8,0.1779
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,16,0.1856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,32,0.1829
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,64,0.1771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,128,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,256,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,512,0.1769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,1024,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,2048,0.1945
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,4096,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,8192,0.2177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,16384,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,32768,0.2380
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,65536,0.2576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,131072,0.3066
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,2,0.1870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,4,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,8,0.1842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,16,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,32,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,64,0.1932
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,128,0.1830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,256,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,512,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,1024,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,2048,0.2111
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,4096,0.2320
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,16384,0.2395
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,8192,0.2384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,32768,0.2526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,65536,0.2924
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,2,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,4,0.1951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,131072,0.3609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,8,0.1974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,16,0.1973
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,32,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,64,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,128,0.1930
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,256,0.1976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,512,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,1024,0.2253
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,2048,0.2398
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,4096,0.2581
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,8192,0.2726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,16384,0.2706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,32768,0.3036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,65536,0.3559
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,131072,0.4664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,2,0.2014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,4,0.2054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,16,0.2091
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,32,0.2060
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,64,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,128,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,8,0.2070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,256,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,512,0.2344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,2048,0.2832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,4096,0.3085
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,1024,0.2515
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,8192,0.3257
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,16384,0.3336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,32768,0.3774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,65536,0.4820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,131072,0.6702
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,2,0.2385
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,4,0.2402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,8,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,16,0.2368
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,32,0.2381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,64,0.2408
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,128,0.2448
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,256,0.2633
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,512,0.2794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,2048,0.3809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,1024,0.3148
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,4096,0.4089
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,16384,0.4909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,8192,0.4355
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,32768,0.6094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,65536,0.8430
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,8,0.3064
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,2,0.3060
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,4,0.3041
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,16,0.3058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,32,0.3078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,64,0.3105
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,131072,1.4009
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,128,0.3144
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,256,0.3330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,512,0.3638
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,2048,0.5597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,4096,0.6122
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,1024,0.4272
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,8192,0.6677
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,16384,0.7817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,32768,1.0097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,2,0.4412
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,4,0.4410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,8,0.4408
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,65536,1.5118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,16,0.4416
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,32,0.4409
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,64,0.4421
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,128,0.4480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,256,0.4854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,512,0.5420
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,2048,0.9079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,1024,0.6660
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,16384,1.3125
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,4096,0.9964
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,8192,1.1064
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,32768,1.7464
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,2,0.1523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,4,0.1382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,8,0.1524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,16,0.1524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,32,0.1480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,64,0.1401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,128,0.1510
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,256,0.1388
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,512,0.1420
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,4096,0.1668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,8192,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,1024,0.1423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,16384,0.1675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,32768,0.1586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,65536,0.1769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,131072,0.2156
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,2048,0.1528
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,2,0.1443
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,4,0.1584
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,8,0.1491
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,16,0.1462
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,32,0.1442
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,128,0.1466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,256,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,512,0.1501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,4096,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,64,0.1548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,8192,0.1626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,16384,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,32768,0.1775
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,2048,0.1481
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,65536,0.1878
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,131072,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,1024,0.1553
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,2,0.1504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,4,0.1682
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,8,0.1549
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,16,0.1597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,32,0.1541
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,64,0.1545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,256,0.1508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,512,0.1687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,1024,0.1525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,2048,0.1623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,128,0.1504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,4096,0.1692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,8192,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,16384,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,32768,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,65536,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,131072,0.2403
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,2,0.1757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,4,0.1626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,8,0.1689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,16,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,32,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,64,0.1613
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,128,0.1720
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,256,0.1648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,512,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,1024,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,2048,0.1730
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,4096,0.1806
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,8192,0.1854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,16384,0.1911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,32768,0.2025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,65536,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,131072,0.2683
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,2,0.1740
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,4,0.1722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,8,0.1780
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,16,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,32,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,64,0.1808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,128,0.1748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,256,0.1803
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,512,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,1024,0.1742
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,2048,0.1906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,4096,0.1966
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,8192,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,16384,0.2057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,32768,0.2299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,65536,0.2579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,131072,0.3132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,2,0.1777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,4,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,8,0.1835
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,16,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,32,0.1856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,64,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,128,0.1876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,256,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,512,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,1024,0.1854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,2048,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,4096,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,8192,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,16384,0.2364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,32768,0.2563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,65536,0.2876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,131072,0.3567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,2,0.1925
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,4,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,8,0.1915
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,16,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,32,0.1903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,64,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,128,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,256,0.1853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,512,0.2025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,1024,0.2185
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,2048,0.2327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,4096,0.2500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,8192,0.2658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,16384,0.2688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,32768,0.2934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,65536,0.3496
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,131072,0.4679
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,2,0.1974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,4,0.1993
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,8,0.2020
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,16,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,32,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,64,0.1985
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,128,0.2023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,256,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,512,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,1024,0.2460
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,2048,0.2776
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,4096,0.3035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,8192,0.3167
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,16384,0.3268
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,32768,0.3777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,65536,0.4721
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,131072,0.6646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,2,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,4,0.2296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,8,0.2275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,16,0.2289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,32,0.2270
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,64,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,128,0.2317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,256,0.2510
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,512,0.2679
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,1024,0.3080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,2048,0.3670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,4096,0.3951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,16384,0.4806
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,32768,0.5959
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,8192,0.4237
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,65536,0.8296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,2,0.2830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,4,0.2820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,8,0.2837
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,16,0.2856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,32,0.2878
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,64,0.2888
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,131072,1.3847
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,128,0.2939
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,256,0.3140
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,512,0.3453
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,2048,0.5316
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,1024,0.4052
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,4096,0.5898
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,16384,0.7572
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,8192,0.6478
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,32768,0.9834
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,2,0.3984
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,65536,1.4884
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,4,0.3998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,8,0.3976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,16,0.3994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,32,0.4011
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,64,0.4004
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,128,0.4073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,256,0.4424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,2,0.1371
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,2048,0.8614
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,16384,1.2723
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,512,0.4956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,4096,0.9447
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,4,0.1369
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,8,0.1460
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,1024,0.6183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,8192,1.0570
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,16,0.1378
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,32,0.1368
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,64,0.1388
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,128,0.1519
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,256,0.1441
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,512,0.1377
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,1024,0.1524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,2048,0.1398
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,4096,0.1624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,32768,1.7035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,8192,0.1664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,16384,0.1545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,32768,0.1644
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,65536,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,2,0.1442
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,131072,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,4,0.1563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,8,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,16,0.1454
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,32,0.1440
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,128,0.1455
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,256,0.1500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,512,0.1563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,1024,0.1459
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,64,0.1566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,2048,0.1464
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,4096,0.1586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,8192,0.1751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,16384,0.1706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,32768,0.1693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,65536,0.1854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,2,0.1555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,131072,0.2079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,4,0.1666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,8,0.1545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,16,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,32,0.1586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,64,0.1561
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,128,0.1597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,256,0.1676
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,512,0.1501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,1024,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,2048,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,4096,0.1747
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,8192,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,16384,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,32768,0.1906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,65536,0.2019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,131072,0.2330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,2,0.1607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,4,0.1713
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,8,0.1629
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,16,0.1609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,32,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,64,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,128,0.1624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,256,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,512,0.1620
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,1024,0.1746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,2048,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,4096,0.1908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,8192,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,16384,0.1909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,32768,0.1923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,65536,0.2272
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,4,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,8,0.1746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,16,0.1707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,131072,0.2767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,32,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,64,0.1687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,256,0.1685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,2,0.1689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,512,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,1024,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,128,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,2048,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,4096,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,8192,0.2037
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,16384,0.2008
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,32768,0.2213
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,65536,0.2634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,131072,0.3044
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,2,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,4,0.1773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,8,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,16,0.1853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,32,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,64,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,128,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,256,0.1748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,512,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,1024,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,2048,0.2070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,4096,0.2222
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,8192,0.2278
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,16384,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,32768,0.2522
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,65536,0.2891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,131072,0.3641
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,2,0.1853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,4,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,8,0.1853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,16,0.1853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,32,0.1838
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,64,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,128,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,256,0.1870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,512,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,1024,0.2098
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,2048,0.2305
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,4096,0.2536
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,8192,0.2598
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,16384,0.2688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,32768,0.2883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,65536,0.3514
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,2,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,4,0.1956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,131072,0.4597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,8,0.1992
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,16,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,32,0.1956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,64,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,128,0.1956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,256,0.2050
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,512,0.2237
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,1024,0.2419
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,2048,0.2754
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,4096,0.2976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,16384,0.3228
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,8192,0.3118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,32768,0.3682
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,65536,0.4741
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,2,0.2238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,4,0.2218
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,131072,0.6647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,8,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,16,0.2204
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,32,0.2215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,64,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,128,0.2262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,256,0.2440
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,512,0.2637
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,1024,0.2968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,2048,0.3643
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,4096,0.3898
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,8192,0.4185
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,16384,0.4789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,32768,0.5915
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,65536,0.8235
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,2,0.2781
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,131072,1.3773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,8,0.2732
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,32,0.2795
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,16,0.2776
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,4,0.2763
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,64,0.2793
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,128,0.2855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,256,0.3068
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,512,0.3362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,2048,0.5244
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,1024,0.3998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,4096,0.5798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,8192,0.6342
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,16384,0.7464
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,32768,0.9766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,2,0.3839
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,8,0.3828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,16,0.3819
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,65536,1.5041
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,4,0.3822
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,32,0.3817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,64,0.3833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,128,0.3895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,256,0.4258
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,512,0.4792
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,2048,0.8386
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,2,0.1469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,16384,1.2490
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,4096,0.9245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,4,0.1444
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,1024,0.5976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,8192,1.0349
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,32768,1.6782
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,8,0.1469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,16,0.1359
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,32,0.1420
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,64,0.1456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,128,0.1357
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,256,0.1479
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,512,0.1359
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,1024,0.1401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,2048,0.1481
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,4096,0.1605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,8192,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,16384,0.1505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,32768,0.1659
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,65536,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,131072,0.2085
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,2,0.1431
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,4,0.1422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,16,0.1542
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,32,0.1463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,64,0.1422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,8,0.1542
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,256,0.1527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,512,0.1420
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,1024,0.1547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,2048,0.1444
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,4096,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,128,0.1421
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,8192,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,16384,0.1675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,32768,0.1626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,65536,0.1911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,131072,0.2091
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,2,0.1562
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,4,0.1607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,8,0.1524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,16,0.1521
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,32,0.1505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,64,0.1623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,128,0.1605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,256,0.1527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,512,0.1563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,1024,0.1523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,2048,0.1641
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,4096,0.1795
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,8192,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,16384,0.1766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,32768,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,65536,0.1968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,131072,0.2332
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,2,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,4,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,8,0.1713
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,16,0.1607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,32,0.1707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,64,0.1634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,128,0.1605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,256,0.1617
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,512,0.1614
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,1024,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,2048,0.1668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,4096,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,8192,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,16384,0.1889
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,32768,0.1909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,65536,0.2355
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,131072,0.2669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,2,0.1665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,4,0.1671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,8,0.1684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,16,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,32,0.1747
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,64,0.1767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,128,0.1726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,256,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,512,0.1706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,1024,0.1710
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,2048,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,4096,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,8192,0.1993
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,16384,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,32768,0.2248
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,65536,0.2568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,131072,0.2975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,2,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,4,0.1867
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,8,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,16,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,32,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,64,0.1747
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,128,0.1787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,256,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,512,0.1797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,1024,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,2048,0.1962
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,4096,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,16384,0.2359
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,8192,0.2275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,32768,0.2465
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,65536,0.2824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,2,0.1870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,131072,0.3537
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,4,0.1836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,8,0.1824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,16,0.1894
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,32,0.1819
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,64,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,128,0.1827
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,256,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,512,0.1911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,1024,0.2093
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,2048,0.2351
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,4096,0.2484
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,8192,0.2579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,16384,0.2636
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,32768,0.2910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,65536,0.3452
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,131072,0.4589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,2,0.1976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,4,0.1952
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,8,0.1939
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,32,0.1952
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,16,0.1945
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,64,0.1994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,128,0.1979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,256,0.1991
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,512,0.2201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,1024,0.2422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,2048,0.2705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,4096,0.2939
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,8192,0.3141
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,16384,0.3228
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,32768,0.3719
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,65536,0.4695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,2,0.2183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,131072,0.6616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,4,0.2158
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,8,0.2219
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,16,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,32,0.2182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,64,0.2201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,128,0.2216
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,256,0.2436
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,512,0.2580
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,1024,0.2985
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,2048,0.3605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,4096,0.3847
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,16384,0.4715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,8192,0.4108
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,32768,0.5887
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,65536,0.8195
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,2,0.2711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,4,0.2713
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,8,0.2658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,131072,1.3756
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,16,0.2705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,32,0.2750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,64,0.2752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,128,0.2813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,256,0.3018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,512,0.3296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,2048,0.5205
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,1024,0.3933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,4096,0.5730
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,16384,0.7420
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,8192,0.6327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,32768,0.9705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,65536,1.4728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,2,0.3735
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,4,0.3728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,8,0.3725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,16,0.3736
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,64,0.3769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,32,0.3752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,128,0.3840
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,256,0.4159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,2048,0.8311
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,512,0.4701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,2,0.1341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,4096,0.9123
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,16384,1.2327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,1024,0.5857
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,4,0.1339
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,8192,1.0264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,8,0.1339
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,16,0.1339
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,32768,1.6659
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,32,0.1423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,64,0.1439
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,128,0.1343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,256,0.1360
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,512,0.1340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,1024,0.1441
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,2048,0.1359
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,4096,0.1566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,32768,0.1534
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,65536,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,131072,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,2,0.1522
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,4,0.1401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,8192,0.1532
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,8,0.1402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,16384,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,16,0.1463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,64,0.1521
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,256,0.1513
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,512,0.1401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,32,0.1402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,128,0.1422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,4096,0.1605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,8192,0.1707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,2048,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,16384,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,1024,0.1413
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,32768,0.1699
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,65536,0.1772
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,131072,0.2071
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,4,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,8,0.1523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,16,0.1468
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,2,0.1583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,32,0.1480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,64,0.1484
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,128,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,256,0.1587
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,512,0.1471
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,1024,0.1525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,2048,0.1514
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,4096,0.1626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,8192,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,16384,0.1642
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,32768,0.1803
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,65536,0.1972
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,131072,0.2297
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,2,0.1626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,4,0.1626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,8,0.1584
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,16,0.1706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,32,0.1686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,64,0.1593
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,128,0.1685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,256,0.1625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,512,0.1586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,1024,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,2048,0.1661
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,4096,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,8192,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,16384,0.1780
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,65536,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,131072,0.2715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,2,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,4,0.1685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,32768,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,8,0.1775
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,16,0.1669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,32,0.1751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,64,0.1694
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,128,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,256,0.1688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,512,0.1665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,1024,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,2048,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,4096,0.1870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,8192,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,16384,0.2068
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,32768,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,65536,0.2493
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,131072,0.2964
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,2,0.1779
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,4,0.1705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,8,0.1689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,16,0.1784
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,32,0.1821
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,128,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,256,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,512,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,1024,0.1765
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,2048,0.1973
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,64,0.1731
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,4096,0.2173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,8192,0.2344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,16384,0.2243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,32768,0.2442
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,65536,0.2798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,2,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,131072,0.3491
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,4,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,16,0.1883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,32,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,64,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,8,0.1829
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,128,0.1792
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,256,0.1850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,512,0.1834
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,2048,0.2317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,1024,0.2026
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,4096,0.2431
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,8192,0.2565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,16384,0.2659
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,32768,0.2834
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,65536,0.3430
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,2,0.1951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,4,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,131072,0.4556
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,8,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,16,0.1958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,32,0.1896
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,64,0.1918
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,128,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,256,0.1962
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,512,0.2215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,1024,0.2397
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,2048,0.2714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,4096,0.2930
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,8192,0.3071
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,16384,0.3163
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,32768,0.3668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,65536,0.4653
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,2,0.2163
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,4,0.2181
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,131072,0.6595
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,8,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,16,0.2143
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,32,0.2153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,64,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,128,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,256,0.2427
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,512,0.2579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,2048,0.3604
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,1024,0.2894
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,4096,0.3815
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,16384,0.4714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,8192,0.4130
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,32768,0.5870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,65536,0.8191
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,2,0.2660
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,4,0.2661
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,8,0.2611
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,32,0.2678
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,16,0.2654
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,64,0.2686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,131072,1.3722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,128,0.2773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,256,0.2980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,512,0.3266
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,2048,0.5148
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,4096,0.5666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,16384,0.7385
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,1024,0.3871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,8192,0.6245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,32768,0.9682
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,65536,1.4679
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,2,0.3688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,4,0.3674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,8,0.3820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,16,0.3683
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,32,0.3704
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,64,0.3704
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,128,0.3759
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,256,0.4116
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,512,0.4610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,2048,0.8225
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,16384,1.2270
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,1024,0.5803
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,2,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,4096,0.9097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,4,0.2155
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,8192,1.0153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,8,0.2176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,32768,1.6606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,32,0.2192
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,16,0.2075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,64,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,128,0.2076
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,512,0.2190
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,1024,0.2110
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,4096,0.2263
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,8192,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,16384,0.2274
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,256,0.2360
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,2048,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,32768,0.2262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,65536,0.2424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,131072,0.2723
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,2,0.2258
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,8,0.2243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,4,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,16,0.2277
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,32,0.2163
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,64,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,128,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,256,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,512,0.2262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,1024,0.2300
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,2048,0.2191
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,4096,0.2338
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,8192,0.2473
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,16384,0.2316
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,32768,0.2435
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,65536,0.2660
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,131072,0.2895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,2,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,4,0.2319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,8,0.2320
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,16,0.2327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,32,0.2391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,64,0.2413
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,128,0.2396
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,256,0.2377
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,512,0.2328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,1024,0.2346
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,2048,0.2316
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,4096,0.2492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,8192,0.2625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,16384,0.2516
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,32768,0.2527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,65536,0.2798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,131072,0.3046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,2,0.2466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,4,0.2549
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,8,0.2453
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,16,0.2530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,32,0.2443
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,64,0.2472
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,128,0.2436
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,256,0.2525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,512,0.2547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,1024,0.2544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,2048,0.2466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,4096,0.2593
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,8192,0.2667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,16384,0.2597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,32768,0.2683
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,65536,0.3033
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,131072,0.3386
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,2,0.2594
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,4,0.2555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,8,0.2631
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,16,0.2560
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,32,0.2639
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,64,0.2550
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,128,0.2627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,256,0.2629
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,512,0.2590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,2048,0.2565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,1024,0.2556
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,4096,0.2792
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,8192,0.2885
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,16384,0.2809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,32768,0.2843
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,65536,0.3141
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,2,0.2849
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,131072,0.3666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,8,0.2874
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,4,0.2825
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,16,0.2896
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,32,0.2856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,64,0.2803
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,128,0.2812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,256,0.2826
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,512,0.2846
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,1024,0.2858
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,2048,0.2842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,4096,0.3094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,8192,0.3076
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,16384,0.3118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,32768,0.3221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,65536,0.3640
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,2,0.3346
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,131072,0.4341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,4,0.3359
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,8,0.3327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,16,0.3326
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,32,0.3323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,64,0.3353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,128,0.3356
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,256,0.3336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,512,0.3361
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,1024,0.3347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,2048,0.3371
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,4096,0.3538
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,8192,0.3683
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,16384,0.3700
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,32768,0.3949
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,65536,0.4578
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,2,0.4250
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,131072,0.5704
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,4,0.4261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,8,0.4250
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,16,0.4286
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,32,0.4288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,64,0.4250
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,256,0.4254
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,128,0.4246
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,512,0.4250
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,1024,0.4319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,2048,0.4333
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,4096,0.4594
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,8192,0.4728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,16384,0.4818
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,32768,0.5294
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,65536,0.6298
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,2,0.6554
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,4,0.6531
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,131072,0.8243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,8,0.6577
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,16,0.6548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,32,0.6541
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,64,0.6566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,128,0.6601
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,256,0.6687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,512,0.6672
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,1024,0.6722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,2048,0.6811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,4096,0.7022
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,8192,0.7327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,16384,0.7901
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,32768,0.9115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,65536,1.1364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,2,1.0859
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,4,1.0852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,131072,1.6860
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,8,1.0870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,16,1.0883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,32,1.0884
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,64,1.0995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,128,1.1026
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,256,1.1036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,512,1.1032
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,2048,1.1256
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,1024,1.1101
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,4096,1.1797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,16384,1.3479
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,8192,1.2319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,32768,1.5789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,2,1.8597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,4,1.8609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,8,1.8609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,65536,2.1056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,32,1.8680
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,16,1.8691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,64,1.8722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,128,1.8808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,256,1.8783
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,512,1.8895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,2048,1.9316
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,2,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,4096,2.0174
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,1024,1.9046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,8192,2.1255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,4,0.1787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,8,0.1714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,16,0.1768
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,32768,2.7741
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,16384,2.3376
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,32,0.1849
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,64,0.1766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,128,0.1699
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,256,0.1850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,512,0.1917
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,1024,0.1804
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,2048,0.1714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,4096,0.1817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,8192,0.1952
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,16384,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,32768,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,65536,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,131072,0.2456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,4,0.1801
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,8,0.1758
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,16,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,32,0.1813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,2,0.1980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,64,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,256,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,512,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,1024,0.1930
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,128,0.1880
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,2048,0.1784
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,4096,0.2014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,8192,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,32768,0.2022
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,65536,0.2332
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,131072,0.2660
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,2,0.1930
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,4,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,16384,0.2091
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,8,0.1957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,16,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,32,0.2014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,64,0.1929
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,128,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,256,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,512,0.1927
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,1024,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,2048,0.1993
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,4096,0.2165
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,8192,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,16384,0.2149
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,32768,0.2193
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,65536,0.2470
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,131072,0.2692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,2,0.2086
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,4,0.1976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,8,0.2031
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,16,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,32,0.2073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,64,0.1998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,128,0.1986
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,256,0.2076
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,512,0.2083
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,1024,0.1983
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,2048,0.2091
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,4096,0.2207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,8192,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,16384,0.2190
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,32768,0.2341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,65536,0.2513
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,131072,0.2974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,2,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,8,0.2196
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,4,0.2121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,16,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,32,0.2129
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,64,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,128,0.2144
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,256,0.2205
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,512,0.2244
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,1024,0.2168
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,2048,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,4096,0.2387
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,8192,0.2396
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,16384,0.2352
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,32768,0.2521
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,65536,0.2848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,131072,0.3271
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,2,0.2344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,4,0.2304
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,8,0.2269
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,16,0.2312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,64,0.2343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,128,0.2382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,32,0.2295
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,256,0.2303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,512,0.2326
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,1024,0.2357
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,2048,0.2322
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,4096,0.2537
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,16384,0.2634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,8192,0.2602
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,32768,0.2714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,65536,0.3114
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,131072,0.3833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,2,0.2615
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,4,0.2589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,8,0.2632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,16,0.2630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,32,0.2607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,64,0.2592
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,128,0.2570
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,256,0.2630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,512,0.2627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,1024,0.2609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,2048,0.2632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,4096,0.2842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,8192,0.2925
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,16384,0.2961
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,32768,0.3245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,65536,0.3827
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,131072,0.4935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,2,0.3074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,4,0.3060
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,8,0.3078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,16,0.3105
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,32,0.3080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,64,0.3117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,128,0.3075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,256,0.3073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,512,0.3108
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,1024,0.3136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,2048,0.3138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,4096,0.3391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,8192,0.3512
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,16384,0.3694
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,32768,0.4095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,65536,0.5095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,2,0.4292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,4,0.4314
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,8,0.4328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,131072,0.7048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,16,0.4321
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,32,0.4321
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,64,0.4312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,128,0.4318
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,256,0.4352
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,512,0.4368
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,1024,0.4381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,2048,0.4462
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,4096,0.4724
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,8192,0.5011
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,16384,0.5575
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,32768,0.6759
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,2,0.6585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,65536,0.9112
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,4,0.6589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,8,0.6564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,16,0.6581
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,32,0.6631
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,131072,1.4661
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,64,0.6694
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,128,0.6754
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,256,0.6746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,512,0.6766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,2048,0.6983
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,1024,0.6856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,16384,0.9170
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,4096,0.7489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,8192,0.8036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,32768,1.1471
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,2,1.0886
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,8,1.0897
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,4,1.0878
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,65536,1.6531
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,16,1.0924
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,32,1.0938
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,64,1.0963
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,128,1.1010
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,256,1.1030
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,512,1.1080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,1024,1.1228
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,2048,1.1525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,2,0.1689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,4096,1.2392
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,4,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,16384,1.5603
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,8192,1.3466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,32768,1.9887
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,8,0.1706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,32,0.1727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,16,0.1700
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,64,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,128,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,256,0.1748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,512,0.1706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,1024,0.1645
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,2048,0.1651
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,4096,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,8192,0.1807
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,16384,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,32768,0.1941
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,65536,0.2126
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,131072,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,2,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,4,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,8,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,16,0.1742
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,32,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,64,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,128,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,256,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,512,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,1024,0.1841
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,2048,0.1739
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,4096,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,8192,0.1927
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,16384,0.1858
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,32768,0.1989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,65536,0.2145
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,131072,0.2536
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,2,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,4,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,8,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,16,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,32,0.1803
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,64,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,128,0.1845
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,256,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,512,0.1923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,1024,0.1809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,2048,0.1830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,4096,0.2019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,8192,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,16384,0.2061
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,32768,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,65536,0.2337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,131072,0.2663
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,2,0.1895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,4,0.1997
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,8,0.1916
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,16,0.1988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,32,0.1979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,64,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,128,0.1894
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,256,0.1915
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,512,0.1923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,1024,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,2048,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,4096,0.2142
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,8192,0.2151
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,16384,0.2090
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,32768,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,65536,0.2486
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,131072,0.2872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,2,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,4,0.2017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,8,0.2002
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,16,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,32,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,64,0.2083
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,128,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,256,0.2033
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,512,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,1024,0.2025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,2048,0.2098
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,4096,0.2299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,8192,0.2407
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,16384,0.2301
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,32768,0.2483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,65536,0.2739
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,131072,0.3200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,2,0.2181
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,4,0.2275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,8,0.2183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,16,0.2208
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,32,0.2201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,64,0.2183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,128,0.2182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,256,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,512,0.2296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,1024,0.2280
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,2048,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,4096,0.2428
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,8192,0.2514
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,16384,0.2524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,32768,0.2664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,65536,0.3130
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,131072,0.3790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,2,0.2480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,4,0.2471
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,8,0.2486
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,16,0.2469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,32,0.2469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,64,0.2490
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,128,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,256,0.2526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,512,0.2485
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,2048,0.2543
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,1024,0.2564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,4096,0.2755
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,8192,0.2835
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,16384,0.2882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,32768,0.3130
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,65536,0.3741
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,131072,0.4871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,2,0.2909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,4,0.2941
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,8,0.2943
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,16,0.2915
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,32,0.2913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,64,0.2912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,256,0.2964
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,128,0.2910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,1024,0.3018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,2048,0.3049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,4096,0.3221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,512,0.2978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,8192,0.3385
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,16384,0.3526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,32768,0.3999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,65536,0.5007
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,131072,0.6916
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,2,0.4050
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,8,0.4047
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,4,0.4043
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,16,0.4045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,32,0.4058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,64,0.4091
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,128,0.4083
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,256,0.4075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,512,0.4103
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,2048,0.4212
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,1024,0.4158
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,16384,0.5346
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,4096,0.4486
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,8192,0.4762
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,32768,0.6513
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,65536,0.8830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,2,0.6040
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,4,0.6043
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,8,0.6052
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,16,0.6059
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,32,0.6074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,131072,1.4411
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,64,0.6106
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,128,0.6166
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,256,0.6174
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,512,0.6200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,2048,0.6422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,1024,0.6272
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,4096,0.6919
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,16384,0.8594
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,8192,0.7481
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,32768,1.0891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,2,0.9943
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,65536,1.5912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,4,0.9944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,8,0.9950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,16,0.9985
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,32,1.0045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,64,1.0080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,128,1.0065
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,256,1.0117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,512,1.0165
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,2048,1.0575
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,2,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,4096,1.1457
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,1024,1.0333
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,4,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,8192,1.2582
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,8,0.1588
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,16,0.1689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,16384,1.4664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,32,0.1604
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,64,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,256,0.1688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,128,0.1700
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,512,0.1587
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,1024,0.1583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,2048,0.1623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,4096,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,8192,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,32768,1.8927
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,16384,0.1815
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,32768,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,65536,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,2,0.1650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,131072,0.2252
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,4,0.1650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,8,0.1689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,16,0.1786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,64,0.1779
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,128,0.1748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,256,0.1649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,512,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,32,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,1024,0.1675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,2048,0.1777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,4096,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,8192,0.1951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,16384,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,32768,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,65536,0.2177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,131072,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,2,0.1740
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,4,0.1753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,8,0.1824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,16,0.1773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,32,0.1731
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,64,0.1732
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,128,0.1837
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,256,0.1827
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,1024,0.1828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,2048,0.1766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,4096,0.1921
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,512,0.1772
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,8192,0.1917
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,16384,0.1897
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,32768,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,65536,0.2293
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,131072,0.2673
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,2,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,4,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,8,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,32,0.1854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,64,0.1850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,16,0.1911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,128,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,256,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,512,0.1917
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,1024,0.1841
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,2048,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,4096,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,8192,0.2050
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,16384,0.2144
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,32768,0.2229
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,65536,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,131072,0.2851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,2,0.1951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,4,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,8,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,16,0.1979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,32,0.2065
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,64,0.1961
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,128,0.1965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,256,0.1956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,512,0.2050
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,1024,0.1959
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,2048,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,4096,0.2177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,8192,0.2295
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,16384,0.2224
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,32768,0.2384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,65536,0.2666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,131072,0.3203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,2,0.2103
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,4,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,8,0.2158
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,16,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,32,0.2099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,64,0.2096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,128,0.2126
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,256,0.2185
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,512,0.2132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,1024,0.2173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,2048,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,4096,0.2382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,16384,0.2529
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,8192,0.2452
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,32768,0.2616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,65536,0.3031
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,2,0.2327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,131072,0.3696
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,4,0.2361
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,8,0.2380
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,16,0.2379
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,32,0.2353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,64,0.2344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,128,0.2399
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,256,0.2355
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,512,0.2404
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,1024,0.2422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,2048,0.2468
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,4096,0.2674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,8192,0.2757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,16384,0.2790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,32768,0.3055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,65536,0.3675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,2,0.2763
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,4,0.2773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,8,0.2746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,131072,0.4754
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,16,0.2754
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,32,0.2764
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,64,0.2749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,256,0.2834
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,128,0.2810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,512,0.2825
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,1024,0.2880
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,2048,0.2893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,4096,0.3101
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,8192,0.3254
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,16384,0.3442
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,32768,0.3856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,65536,0.4879
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,2,0.3810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,4,0.3814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,131072,0.6824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,16,0.3850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,8,0.3834
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,64,0.3846
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,32,0.3834
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,128,0.3879
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,256,0.3865
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,512,0.3875
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,1024,0.3926
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,2048,0.4030
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,4096,0.4279
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,16384,0.5133
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,8192,0.4526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,32768,0.6337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,65536,0.8648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,4,0.5606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,2,0.5568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,8,0.5599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,131072,1.4178
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,16,0.5584
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,32,0.5604
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,64,0.5655
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,128,0.5716
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,256,0.5716
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,512,0.5716
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,2048,0.5948
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,1024,0.5825
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,4096,0.6457
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,32768,1.0434
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,8192,0.7006
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,16384,0.8154
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,65536,1.5445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,2,0.8968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,4,0.8955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,16,0.8984
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,8,0.8962
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,32,0.9038
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,64,0.9049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,128,0.9078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,256,0.9084
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,512,0.9188
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,2,0.1568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,2048,0.9593
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,4,0.1566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,4096,1.0467
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,1024,0.9317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,8,0.1581
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,8192,1.1531
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,16384,1.3748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,16,0.1564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,32,0.1655
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,64,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,128,0.1586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,256,0.1607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,512,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,32768,1.8023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,1024,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,2048,0.1566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,4096,0.1692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,8192,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,16384,0.1731
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,65536,0.1937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,32768,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,131072,0.2189
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,2,0.1765
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,4,0.1634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,8,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,16,0.1727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,32,0.1643
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,64,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,128,0.1688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,512,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,256,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,1024,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,2048,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,4096,0.1884
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,8192,0.1810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,16384,0.1782
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,32768,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,65536,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,131072,0.2295
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,2,0.1830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,4,0.1820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,8,0.1688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,16,0.1688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,32,0.1766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,64,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,128,0.1669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,256,0.1689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,512,0.1830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,1024,0.1778
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,2048,0.1711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,4096,0.1869
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,8192,0.1993
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,16384,0.1926
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,32768,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,65536,0.2146
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,131072,0.2539
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,2,0.1904
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,4,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,8,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,16,0.1830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,32,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,64,0.1930
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,128,0.1810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,256,0.1801
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,512,0.1896
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,2048,0.1932
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,4096,0.1957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,8192,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,16384,0.2060
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,32768,0.2064
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,65536,0.2396
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,1024,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,131072,0.2858
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,2,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,4,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,8,0.1901
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,16,0.1897
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,32,0.1939
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,64,0.1922
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,128,0.2049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,256,0.2019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,512,0.1977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,1024,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,2048,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,4096,0.2114
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,8192,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,16384,0.2196
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,32768,0.2333
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,65536,0.2701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,131072,0.3097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,2,0.2085
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,4,0.2068
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,8,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,16,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,32,0.2043
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,64,0.2079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,128,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,256,0.2136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,512,0.2101
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,1024,0.2099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,2048,0.2239
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,4096,0.2326
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,8192,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,16384,0.2408
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,32768,0.2617
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,65536,0.2968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,131072,0.3735
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,2,0.2300
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,4,0.2303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,8,0.2319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,16,0.2316
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,32,0.2303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,64,0.2359
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,128,0.2344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,256,0.2303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,512,0.2324
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,2048,0.2410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,1024,0.2363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,4096,0.2603
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,8192,0.2754
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,16384,0.2782
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,32768,0.3033
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,65536,0.3577
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,131072,0.4728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,2,0.2661
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,4,0.2670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,8,0.2708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,16,0.2655
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,32,0.2691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,64,0.2656
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,128,0.2715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,256,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,512,0.2753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,1024,0.2772
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,4096,0.3030
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,8192,0.3227
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,16384,0.3326
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,2048,0.2856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,32768,0.3814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,65536,0.4796
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,2,0.3693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,4,0.3708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,16,0.3722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,8,0.3676
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,131072,0.6742
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,32,0.3741
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,64,0.3736
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,128,0.3723
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,256,0.3749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,512,0.3789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,2048,0.3864
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,1024,0.3812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,4096,0.4172
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,8192,0.4431
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,16384,0.5025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,32768,0.6178
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,65536,0.8562
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,4,0.5357
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,2,0.5385
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,8,0.5357
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,32,0.5395
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,16,0.5362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,64,0.5431
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,131072,1.4070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,128,0.5466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,256,0.5489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,512,0.5509
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,1024,0.5574
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,2048,0.5736
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,4096,0.6238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,8192,0.6783
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,16384,0.7921
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,32768,1.0242
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,2,0.8485
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,4,0.8471
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,65536,1.5279
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,8,0.8475
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,16,0.8504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,32,0.8505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,64,0.8568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,128,0.8560
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,256,0.8606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,2048,0.9099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,512,0.8676
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,4096,0.9979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,2,0.1524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,16384,1.3177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,1024,0.8828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,8192,1.1029
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,4,0.1523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,8,0.1608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,32768,1.7489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,16,0.1530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,32,0.1525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,64,0.1566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,128,0.1617
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,256,0.1603
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,512,0.1607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,1024,0.1604
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,2048,0.1524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,8192,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,16384,0.1754
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,4096,0.1659
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,65536,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,131072,0.2136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,2,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,4,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,8,0.1678
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,32768,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,16,0.1762
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,32,0.1624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,64,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,128,0.1660
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,256,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,512,0.1689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,1024,0.1747
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,2048,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,4096,0.1752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,8192,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,16384,0.1792
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,32768,0.1937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,65536,0.2070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,131072,0.2391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,2,0.1675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,4,0.1680
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,8,0.1675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,16,0.1768
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,32,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,64,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,128,0.1786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,256,0.1682
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,512,0.1689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,1024,0.1686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,2048,0.1828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,4096,0.1917
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,8192,0.1942
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,32768,0.1894
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,65536,0.2197
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,131072,0.2516
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,2,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,16384,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,4,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,8,0.1890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,16,0.1792
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,32,0.1878
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,64,0.1828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,128,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,256,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,512,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,2048,0.1808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,1024,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,4096,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,8192,0.1974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,16384,0.1989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,32768,0.2145
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,65536,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,131072,0.2753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,2,0.1889
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,4,0.1974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,8,0.1870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,16,0.1944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,32,0.2022
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,128,0.1981
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,256,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,512,0.1896
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,64,0.1926
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,1024,0.1890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,2048,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,4096,0.2183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,8192,0.2183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,16384,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,32768,0.2287
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,65536,0.2594
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,131072,0.3223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,4,0.2014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,2,0.2038
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,8,0.2111
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,16,0.2058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,32,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,64,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,128,0.2003
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,256,0.2101
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,512,0.2025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,1024,0.2133
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,2048,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,4096,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,8192,0.2401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,16384,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,32768,0.2551
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,65536,0.3039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,131072,0.3617
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,2,0.2315
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,4,0.2278
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,8,0.2309
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,16,0.2266
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,32,0.2242
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,64,0.2334
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,128,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,256,0.2285
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,512,0.2330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,2048,0.2391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,1024,0.2391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,4096,0.2627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,8192,0.2685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,16384,0.2736
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,32768,0.3003
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,65536,0.3583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,2,0.2648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,4,0.2652
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,131072,0.4729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,16,0.2641
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,32,0.2608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,64,0.2630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,128,0.2631
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,8,0.2610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,256,0.2693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,512,0.2693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,1024,0.2747
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,2048,0.2797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,4096,0.3039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,8192,0.3167
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,16384,0.3344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,32768,0.3765
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,65536,0.4786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,131072,0.6715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,2,0.3634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,4,0.3650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,8,0.3664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,32,0.3669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,16,0.3640
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,64,0.3662
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,128,0.3709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,256,0.3697
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,512,0.3714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,1024,0.3752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,2048,0.3852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,4096,0.4100
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,16384,0.4987
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,8192,0.4370
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,32768,0.6173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,65536,0.8498
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,2,0.5264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,8,0.5275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,16,0.5264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,32,0.5269
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,64,0.5326
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,131072,1.4021
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,4,0.5270
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,128,0.5373
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,256,0.5391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,512,0.5417
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,2048,0.5619
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,1024,0.5469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,4096,0.6121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,16384,0.7810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,8192,0.6684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,32768,1.0109
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,2,0.8262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,4,0.8240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,65536,1.5175
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,16,0.8269
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,8,0.8270
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,32,0.8302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,64,0.8319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,128,0.8373
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,256,0.8394
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,2048,0.8931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,512,0.8438
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,16384,1.2998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,4096,0.9724
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,2,0.1584
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,8192,1.0872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,4,0.1503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,8,0.1505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,1024,0.8634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,16,0.1630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,32768,1.7349
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,32,0.1631
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,64,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,128,0.1503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,256,0.1504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,512,0.1535
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,1024,0.1588
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,2048,0.1636
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,4096,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,8192,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,16384,0.1643
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,32768,0.1787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,65536,0.1853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,131072,0.2198
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,2,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,4,0.1730
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,8,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,16,0.1705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,32,0.1607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,64,0.1603
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,128,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,256,0.1696
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,512,0.1605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,1024,0.1707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,2048,0.1628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,4096,0.1828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,8192,0.1768
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,16384,0.1830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,32768,0.1786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,65536,0.2090
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,131072,0.2251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,2,0.1732
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,4,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,8,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,16,0.1688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,64,0.1787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,128,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,256,0.1693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,32,0.1687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,512,0.1695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,1024,0.1771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,2048,0.1689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,4096,0.1911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,8192,0.1976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,16384,0.1843
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,32768,0.1896
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,65536,0.2158
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,2,0.1769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,131072,0.2523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,4,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,8,0.1877
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,16,0.1907
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,32,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,64,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,128,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,256,0.1798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,512,0.1871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,1024,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,4096,0.1992
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,8192,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,16384,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,2048,0.1779
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,32768,0.2023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,65536,0.2452
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,131072,0.2836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,4,0.1865
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,8,0.1964
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,16,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,2,0.1936
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,32,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,64,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,128,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,256,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,512,0.1992
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,1024,0.1968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,2048,0.1949
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,4096,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,8192,0.2133
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,16384,0.2121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,32768,0.2360
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,131072,0.3175
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,2,0.2023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,4,0.1997
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,65536,0.2657
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,8,0.2080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,16,0.2004
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,32,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,64,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,256,0.2053
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,512,0.1999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,1024,0.2012
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,2048,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,128,0.2062
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,4096,0.2306
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,8192,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,16384,0.2364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,32768,0.2509
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,65536,0.2999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,131072,0.3599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,2,0.2252
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,4,0.2228
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,16,0.2284
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,8,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,32,0.2299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,64,0.2255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,128,0.2309
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,256,0.2259
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,512,0.2298
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,1024,0.2303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,2048,0.2402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,4096,0.2547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,8192,0.2661
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,16384,0.2775
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,32768,0.3014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,65536,0.3562
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,131072,0.4652
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,8,0.2629
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,16,0.2626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,32,0.2600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,4,0.2592
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,64,0.2650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,128,0.2596
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,256,0.2604
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,512,0.2710
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,2,0.2603
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,1024,0.2773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,2048,0.2770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,4096,0.2989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,16384,0.3319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,8192,0.3131
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,32768,0.3728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,65536,0.4779
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,2,0.3605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,4,0.3595
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,131072,0.6675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,8,0.3599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,16,0.3601
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,32,0.3635
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,64,0.3650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,128,0.3639
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,256,0.3651
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,512,0.3680
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,2048,0.3798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,1024,0.3727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,4096,0.4059
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,8192,0.4365
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,16384,0.4937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,32768,0.6124
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,65536,0.8475
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,2,0.5185
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,4,0.5223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,8,0.5206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,131072,1.3965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,16,0.5202
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,32,0.5225
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,128,0.5294
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,64,0.5244
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,256,0.5327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,512,0.5329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,2048,0.5576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,1024,0.5435
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,4096,0.6092
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,16384,0.7765
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,8192,0.6616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,32768,1.0006
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,2,0.8124
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,4,0.8148
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,8,0.8129
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,65536,1.5079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,32,0.8182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,64,0.8205
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,16,0.8161
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,128,0.8222
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,256,0.8271
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,512,0.8329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,2048,0.8779
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,16384,1.2941
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,1024,0.8515
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,4096,0.9704
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,2,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,8192,1.0765
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,4,0.1503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,8,0.1514
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,16,0.1545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,32768,1.7145
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,32,0.1643
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,64,0.1484
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,128,0.1576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,256,0.1524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,512,0.1573
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,1024,0.1524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,2048,0.1543
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,8192,0.1698
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,4096,0.1766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,16384,0.1623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,32768,0.1726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,65536,0.1972
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,2,0.1615
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,131072,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,4,0.1586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,8,0.1584
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,16,0.1690
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,32,0.1582
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,256,0.1612
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,64,0.1643
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,512,0.1687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,1024,0.1599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,2048,0.1587
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,4096,0.1828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,128,0.1706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,8192,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,16384,0.1712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,32768,0.1771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,65536,0.2044
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,131072,0.2342
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,2,0.1668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,4,0.1706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,8,0.1672
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,16,0.1675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,32,0.1767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,64,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,128,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,256,0.1645
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,512,0.1669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,1024,0.1699
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,2048,0.1668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,4096,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,8192,0.1967
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,16384,0.1859
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,32768,0.1848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,65536,0.2092
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,131072,0.2569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,2,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,4,0.1798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,8,0.1850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,16,0.1771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,32,0.1834
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,64,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,128,0.1740
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,256,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,512,0.1828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,1024,0.1768
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,2048,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,4096,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,8192,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,16384,0.1923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,32768,0.2030
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,65536,0.2328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,131072,0.2732
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,2,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,4,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,8,0.1980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,16,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,32,0.1829
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,64,0.1833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,128,0.1866
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,256,0.1871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,512,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,1024,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,2048,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,4096,0.2084
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,8192,0.2058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,16384,0.2156
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,32768,0.2271
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,65536,0.2584
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,131072,0.3145
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,4,0.2019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,8,0.2047
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,2,0.1963
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,16,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,32,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,64,0.1999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,128,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,256,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,512,0.2065
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,1024,0.1980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,2048,0.2046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,4096,0.2289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,8192,0.2429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,16384,0.2334
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,32768,0.2526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,65536,0.2919
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,131072,0.3639
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,2,0.2214
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,8,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,4,0.2183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,16,0.2215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,32,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,128,0.2220
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,256,0.2278
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,64,0.2249
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,512,0.2339
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,2048,0.2316
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,1024,0.2295
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,4096,0.2501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,8192,0.2634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,16384,0.2689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,32768,0.2958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,65536,0.3604
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,131072,0.4639
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,2,0.2570
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,4,0.2590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,8,0.2569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,16,0.2578
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,32,0.2609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,64,0.2603
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,128,0.2549
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,256,0.2587
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,512,0.2643
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,1024,0.2743
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,2048,0.2776
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,4096,0.2964
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,16384,0.3238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,8192,0.3096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,32768,0.3746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,65536,0.4698
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,131072,0.6701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,2,0.3597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,4,0.3562
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,8,0.3579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,16,0.3560
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,32,0.3564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,64,0.3606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,128,0.3606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,256,0.3628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,512,0.3671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,1024,0.3697
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,2048,0.3778
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,16384,0.4899
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,4096,0.4028
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,8192,0.4295
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,32768,0.6104
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,65536,0.8407
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,2,0.5154
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,4,0.5154
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,131072,1.3951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,16,0.5181
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,8,0.5130
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,32,0.5176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,64,0.5202
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,128,0.5256
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,256,0.5270
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,512,0.5309
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,1024,0.5399
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,2048,0.5538
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,4096,0.6009
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,16384,0.7686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,8192,0.6597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,32768,0.9993
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,2,0.8061
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,4,0.8083
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,8,0.8093
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,65536,1.5005
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,16,0.8088
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,64,0.8168
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,128,0.8173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,32,0.8123
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,256,0.8188
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,512,0.8307
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,2048,0.8701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,16384,1.2807
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,4096,0.9661
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,1024,0.8389
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,8192,1.0685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,2,0.2575
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,32768,1.7115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,4,0.2640
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,8,0.2624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,32,0.2557
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,16,0.2527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,64,0.2650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,128,0.2611
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,256,0.2491
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,2048,0.2658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,1024,0.2674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,8192,0.2749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,4096,0.2665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,32768,0.2828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,16384,0.3156
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,512,0.2523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,65536,0.2883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,2,0.2710
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,131072,0.3285
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,8,0.2612
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,4,0.2630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,16,0.2667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,32,0.2746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,64,0.2638
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,512,0.2722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,256,0.2649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,2048,0.2644
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,1024,0.2647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,128,0.2708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,4096,0.2956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,8192,0.2954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,16384,0.2828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,65536,0.3133
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,131072,0.3312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,2,0.2699
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,32768,0.2854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,4,0.2789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,8,0.2743
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,16,0.2696
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,32,0.2675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,128,0.2797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,64,0.2685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,256,0.2709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,512,0.2796
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,1024,0.2698
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,2048,0.2797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,4096,0.2914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,8192,0.2911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,16384,0.2970
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,65536,0.3144
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,32768,0.2918
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,131072,0.3560
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,2,0.2791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,4,0.2851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,8,0.2832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,16,0.2786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,32,0.2859
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,64,0.2781
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,128,0.2885
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,1024,0.2873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,2048,0.2850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,512,0.2816
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,256,0.2813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,4096,0.2999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,8192,0.3147
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,16384,0.2995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,32768,0.3167
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,65536,0.3364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,131072,0.3731
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,2,0.2981
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,4,0.2996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,8,0.2988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,16,0.2982
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,32,0.3060
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,64,0.3066
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,128,0.3039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,256,0.2995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,512,0.3001
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,2048,0.3057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,4096,0.3217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,1024,0.3036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,16384,0.3327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,8192,0.3362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,32768,0.3377
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,65536,0.3630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,131072,0.4105
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,2,0.3054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,4,0.3033
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,8,0.3064
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,32,0.3112
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,64,0.3096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,16,0.3092
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,128,0.3043
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,256,0.3041
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,1024,0.3083
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,512,0.3061
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,2048,0.3167
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,16384,0.3411
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,8192,0.3419
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,4096,0.3358
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,32768,0.3515
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,65536,0.3888
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,131072,0.4588
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,2,0.3301
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,4,0.3320
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,32,0.3385
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,16,0.3437
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,8,0.3366
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,64,0.3320
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,128,0.3324
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,256,0.3377
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,512,0.3330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,4096,0.3623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,2048,0.3450
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,8192,0.3758
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,1024,0.3410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,16384,0.3751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,32768,0.3991
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,65536,0.4601
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,131072,0.5701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,4,0.4097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,8,0.4104
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,2,0.4114
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,16,0.4271
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,32,0.4058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,64,0.4067
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,128,0.4053
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,256,0.4093
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,512,0.4131
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,2048,0.4335
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,1024,0.4237
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,4096,0.4591
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,8192,0.4692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,16384,0.4842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,32768,0.5275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,65536,0.6283
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,2,0.5520
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,8,0.5854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,131072,0.8282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,16,0.5522
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,32,0.5492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,4,0.5509
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,64,0.5536
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,128,0.5608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,256,0.5827
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,512,0.5702
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,2048,0.6075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,4096,0.6400
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,8192,0.6663
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,1024,0.5808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,16384,0.7234
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,32768,0.8450
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,2,0.8538
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,4,0.8533
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,65536,1.0804
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,8,0.8533
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,16,0.8530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,131072,1.6376
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,32,0.8546
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,64,0.8539
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,256,0.8657
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,128,0.8584
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,512,0.8771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,1024,0.9035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,2048,0.9519
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,4096,1.0142
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,8192,1.0667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,16384,1.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,2,1.4748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,32768,1.4115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,8,1.4786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,4,1.4785
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,65536,1.9180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,16,1.4810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,32,1.4784
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,64,1.4847
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,128,1.4924
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,256,1.5049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,512,1.5308
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,2048,1.6740
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,4096,1.7789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,1024,1.6047
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,2,0.2093
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,16384,2.1073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,4,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,8,0.2231
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,8192,1.8885
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,16,0.2218
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,32,0.2083
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,64,0.2100
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,32768,2.5416
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,128,0.2329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,256,0.2058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,512,0.2194
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,2048,0.2250
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,1024,0.2102
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,4096,0.2265
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,8192,0.2350
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,16384,0.2215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,32768,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,131072,0.2890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,2,0.2190
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,4,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,65536,0.2430
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,8,0.2193
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,16,0.2245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,64,0.2201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,32,0.2556
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,128,0.2292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,256,0.2196
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,512,0.2284
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,1024,0.2211
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,2048,0.2301
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,4096,0.2393
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,8192,0.2425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,16384,0.2493
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,32768,0.2539
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,65536,0.2572
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,131072,0.2988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,2,0.2259
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,4,0.2288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,8,0.2267
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,16,0.2341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,32,0.2250
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,128,0.2341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,64,0.2220
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,256,0.2310
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,512,0.2343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,2048,0.2302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,4096,0.2483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,16384,0.2431
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,8192,0.2474
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,32768,0.2565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,1024,0.2294
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,65536,0.2760
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,131072,0.3106
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,2,0.2312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,4,0.2326
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,16,0.2332
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,32,0.2393
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,8,0.2334
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,128,0.2382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,256,0.2421
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,64,0.2354
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,512,0.2327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,1024,0.2332
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,2048,0.2443
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,4096,0.2529
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,8192,0.2630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,16384,0.2599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,32768,0.2697
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,65536,0.2825
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,131072,0.3287
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,4,0.2554
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,2,0.2498
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,8,0.2499
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,16,0.2537
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,32,0.2511
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,64,0.2571
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,128,0.2489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,256,0.2548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,512,0.2533
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,1024,0.2534
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,2048,0.2590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,8192,0.2876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,16384,0.2732
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,32768,0.2865
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,4096,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,65536,0.3207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,131072,0.3673
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,2,0.2523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,4,0.2524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,8,0.2606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,16,0.2525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,32,0.2530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,64,0.2572
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,128,0.2555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,256,0.2523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,512,0.2542
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,1024,0.2644
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,2048,0.2627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,4096,0.2855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,8192,0.2930
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,16384,0.2854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,32768,0.3001
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,65536,0.3481
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,2,0.2727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,131072,0.4110
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,4,0.2737
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,8,0.2770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,16,0.2769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,32,0.2799
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,64,0.2728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,128,0.2732
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,256,0.2736
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,512,0.2783
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,1024,0.2792
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,2048,0.2897
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,4096,0.3084
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,8192,0.3205
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,16384,0.3195
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,32768,0.3424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,65536,0.4031
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,2,0.3280
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,131072,0.5136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,4,0.3311
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,8,0.3311
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,16,0.3308
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,32,0.3268
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,64,0.3266
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,128,0.3271
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,512,0.3359
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,256,0.3300
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,1024,0.3380
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,2048,0.3547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,4096,0.3791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,8192,0.3882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,16384,0.4028
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,32768,0.4490
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,65536,0.5523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,2,0.4404
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,4,0.4433
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,131072,0.7421
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,8,0.4410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,16,0.4401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,64,0.4391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,32,0.4391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,128,0.4694
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,256,0.4471
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,512,0.4508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,2048,0.4916
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,1024,0.4649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,16384,0.6059
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,4096,0.5206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,8192,0.5508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,32768,0.7275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,65536,0.9568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,2,0.6623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,4,0.6632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,8,0.6644
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,16,0.6662
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,32,0.6667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,131072,1.5165
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,64,0.6744
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,128,0.6797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,256,0.6813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,512,0.6953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,2048,0.7700
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,1024,0.7227
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,4096,0.8281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,8192,0.8846
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,16384,0.9942
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,32768,1.2292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,4,1.1521
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,2,1.1498
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,8,1.1528
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,16,1.1507
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,32,1.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,65536,1.7224
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,64,1.1614
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,128,1.1675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,256,1.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,512,1.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,2048,1.3451
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,1024,1.2513
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,4096,1.4493
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,2,0.1860
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,16384,1.7721
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,4,0.1824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,8192,1.5575
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,32768,2.2106
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,8,0.1855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,16,0.1820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,32,0.1837
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,64,0.1826
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,128,0.1974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,256,0.1883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,512,0.1874
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,2048,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,1024,0.1964
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,4096,0.2018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,8192,0.2041
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,16384,0.2008
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,32768,0.2196
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,65536,0.2237
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,2,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,131072,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,4,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,8,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,16,0.2243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,32,0.1980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,64,0.2218
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,128,0.1948
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,256,0.2075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,512,0.1940
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,1024,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,2048,0.2026
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,8192,0.2296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,4096,0.2130
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,16384,0.2093
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,32768,0.2263
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,65536,0.2485
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,131072,0.2591
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,2,0.2003
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,4,0.2063
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,8,0.2009
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,32,0.2122
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,16,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,128,0.2080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,64,0.2020
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,256,0.2101
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,512,0.2009
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,2048,0.2039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,4096,0.2258
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,1024,0.2150
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,8192,0.2255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,16384,0.2254
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,65536,0.2526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,32768,0.2265
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,131072,0.2757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,2,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,8,0.2093
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,16,0.2140
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,32,0.2163
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,64,0.2167
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,4,0.2186
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,128,0.2100
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,256,0.2099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,512,0.2107
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,1024,0.2111
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,2048,0.2210
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,4096,0.2391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,8192,0.2412
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,16384,0.2285
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,32768,0.2377
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,65536,0.2718
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,131072,0.2998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,2,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,4,0.2315
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,8,0.2262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,16,0.2301
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,32,0.2339
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,64,0.2283
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,128,0.2243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,256,0.2271
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,512,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,1024,0.2266
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,4096,0.2504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,2048,0.2424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,8192,0.2600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,32768,0.2665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,16384,0.2506
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,131072,0.3440
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,65536,0.2989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,2,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,8,0.2248
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,4,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,16,0.2310
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,32,0.2293
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,64,0.2311
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,128,0.2248
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,256,0.2253
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,512,0.2317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,2048,0.2380
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,4096,0.2513
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,8192,0.2589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,1024,0.2338
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,16384,0.2641
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,32768,0.2728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,65536,0.3142
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,131072,0.3793
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,2,0.2419
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,4,0.2353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,8,0.2385
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,16,0.2465
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,32,0.2417
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,128,0.2370
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,64,0.2383
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,256,0.2423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,512,0.2391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,2048,0.2511
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,4096,0.2733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,1024,0.2428
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,8192,0.2820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,16384,0.2812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,32768,0.3146
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,65536,0.3690
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,2,0.2882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,131072,0.4746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,4,0.2843
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,8,0.3018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,16,0.2878
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,32,0.2819
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,64,0.2876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,128,0.3010
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,256,0.2856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,512,0.2886
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,1024,0.2942
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,8192,0.3449
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,2048,0.3107
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,4096,0.3285
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,16384,0.3611
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,32768,0.4081
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,65536,0.5049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,2,0.3757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,131072,0.6991
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,4,0.3740
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,8,0.3772
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,16,0.3768
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,32,0.3744
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,64,0.3768
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,128,0.3748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,256,0.3790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,512,0.3846
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,2048,0.4230
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,4096,0.4540
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,1024,0.3989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,8192,0.4793
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,16384,0.5421
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,32768,0.6582
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,65536,0.8908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,131072,1.4502
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,2,0.5349
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,4,0.5329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,8,0.5362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,16,0.5328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,32,0.5361
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,64,0.6201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,128,0.5393
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,256,0.5983
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,512,0.5604
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,2048,0.6316
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,4096,0.6933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,1024,0.6046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,8192,0.7488
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,16384,0.8595
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,32768,1.0897
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,65536,1.5942
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,2,0.9000
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,4,0.9017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,8,0.9046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,16,0.9094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,32,0.9185
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,64,0.9198
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,128,0.9217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,256,0.9372
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,512,1.0209
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,2048,1.1026
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,4096,1.2054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,1024,1.0332
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,16384,1.5283
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,8192,1.3178
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,32768,1.9564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,2,0.2733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,4,0.2656
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,8,0.2638
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,16,0.2655
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,64,0.2737
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,32,0.2858
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,128,0.2626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,512,0.2735
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,256,0.2828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,1024,0.2615
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,2048,0.2633
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,4096,0.2832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,8192,0.2965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,16384,0.2749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,32768,0.2803
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,65536,0.3120
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,131072,0.3228
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,2,0.3173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,4,0.2755
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,8,0.2790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,16,0.2834
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,32,0.2747
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,64,0.2775
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,256,0.2854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,128,0.2751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,512,0.3182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,2048,0.2797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,1024,0.2753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,4096,0.3019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,8192,0.2944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,16384,0.2984
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,32768,0.2935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,65536,0.3145
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,131072,0.3504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,2,0.2948
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,4,0.2931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,8,0.2960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,16,0.2908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,32,0.2974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,64,0.2895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,128,0.2883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,256,0.2980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,512,0.2947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,1024,0.2902
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,2048,0.2971
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,8192,0.3182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,16384,0.3055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,4096,0.3055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,32768,0.3090
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,65536,0.3392
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,131072,0.3673
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,2,0.3021
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,4,0.3110
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,8,0.3138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,16,0.3016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,32,0.3026
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,64,0.3026
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,128,0.3121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,256,0.3061
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,512,0.3043
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,1024,0.3116
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,2048,0.3125
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,4096,0.3173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,8192,0.3220
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,16384,0.3188
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,32768,0.3363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,2,0.3286
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,131072,0.3909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,65536,0.3475
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,4,0.3326
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,8,0.3232
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,16,0.3234
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,32,0.3245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,64,0.3324
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,256,0.3323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,512,0.3267
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,128,0.3239
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,1024,0.3337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,2048,0.3242
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,4096,0.3398
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,32768,0.3590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,65536,0.3900
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,131072,0.4340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,16384,0.3426
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,2,0.3463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,8192,0.3464
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,8,0.3397
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,4,0.3400
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,16,0.3460
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,32,0.3431
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,128,0.3470
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,64,0.3416
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,256,0.3466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,512,0.3404
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,1024,0.3413
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,2048,0.3416
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,4096,0.3651
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,8192,0.3680
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,16384,0.3694
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,32768,0.3796
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,65536,0.4256
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,131072,0.4871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,2,0.3950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,8,0.3995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,16,0.3939
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,32,0.3994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,4,0.3959
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,64,0.3951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,128,0.4000
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,256,0.3956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,512,0.3929
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,1024,0.3950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,2048,0.4005
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,4096,0.4155
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,8192,0.4290
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,16384,0.4299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,32768,0.4607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,65536,0.5138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,131072,0.6227
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,2,0.4899
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,4,0.4944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,16,0.4947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,32,0.4902
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,8,0.4910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,64,0.4957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,128,0.4901
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,256,0.4899
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,512,0.4890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,2048,0.4995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,4096,0.5232
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,16384,0.5478
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,1024,0.4900
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,8192,0.5369
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,32768,0.5957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,65536,0.6906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,131072,0.8945
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,4,0.7252
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,16,0.7266
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,32,0.7269
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,2,0.7251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,8,0.7264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,64,0.7330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,128,0.7306
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,256,0.7352
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,2048,0.7505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,1024,0.7394
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,16384,0.8598
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,512,0.7390
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,8192,0.8005
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,4096,0.7717
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,32768,0.9749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,65536,1.2091
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,2,1.1877
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,4,1.1855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,8,1.1855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,32,1.1886
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,16,1.1879
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,131072,1.7600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,64,1.1923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,128,1.1999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,256,1.1991
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,512,1.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,2048,1.2231
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,1024,1.2084
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,4096,1.2713
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,8192,1.3286
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,16384,1.4386
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,32768,1.6782
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,65536,2.1879
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,2,2.0274
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,8,2.0252
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,4,2.0208
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,32,2.0308
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,16,2.0302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,64,2.0339
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,128,2.0371
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,256,2.0351
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,512,2.0479
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,2048,2.0846
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,1024,2.0581
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,2,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,16384,2.5002
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,4096,2.1745
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,4,0.2132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,8192,2.2762
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,8,0.2031
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,16,0.2162
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,32768,2.9336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,32,0.2034
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,64,0.2125
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,128,0.2027
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,256,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,512,0.2062
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,2048,0.2173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,1024,0.2059
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,4096,0.2303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,8192,0.2234
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,16384,0.2171
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,65536,0.2488
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,32768,0.2558
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,2,0.2245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,131072,0.2719
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,4,0.2171
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,8,0.2165
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,16,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,32,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,64,0.2251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,128,0.2258
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,512,0.2255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,256,0.2152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,1024,0.2170
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,2048,0.2184
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,4096,0.2425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,8192,0.2347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,16384,0.2342
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,65536,0.2649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,32768,0.2328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,131072,0.2902
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,2,0.2244
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,4,0.2237
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,8,0.2357
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,16,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,32,0.2275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,64,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,128,0.2337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,256,0.2299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,512,0.2251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,1024,0.2362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,2048,0.2233
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,4096,0.2391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,8192,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,16384,0.2357
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,32768,0.2547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,65536,0.2709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,2,0.2303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,131072,0.3081
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,4,0.2285
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,8,0.2309
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,16,0.2348
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,32,0.2300
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,64,0.2386
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,128,0.2377
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,1024,0.2326
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,256,0.2408
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,512,0.2292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,4096,0.2500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,2048,0.2313
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,8192,0.2519
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,65536,0.2850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,16384,0.2559
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,32768,0.2534
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,131072,0.3220
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,2,0.2511
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,4,0.2513
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,8,0.2544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,16,0.2512
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,32,0.2593
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,64,0.2490
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,128,0.2589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,256,0.2583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,512,0.2526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,1024,0.2501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,2048,0.2574
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,4096,0.2688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,8192,0.2734
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,16384,0.2779
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,32768,0.2853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,65536,0.3107
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,2,0.2625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,131072,0.3575
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,4,0.2600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,8,0.2583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,32,0.2636
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,16,0.2569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,128,0.2632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,64,0.2572
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,256,0.2565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,512,0.2624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,1024,0.2607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,2048,0.2593
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,4096,0.2826
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,8192,0.2811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,16384,0.2812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,65536,0.3356
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,32768,0.3039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,131072,0.4104
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,2,0.2879
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,4,0.2865
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,8,0.2919
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,16,0.2862
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,32,0.2855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,64,0.2850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,128,0.2906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,256,0.2880
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,512,0.2927
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,1024,0.2937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,2048,0.2892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,4096,0.3070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,8192,0.3170
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,16384,0.3199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,32768,0.3517
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,65536,0.4112
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,131072,0.5161
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,2,0.3312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,4,0.3296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,8,0.3309
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,16,0.3336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,32,0.3316
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,64,0.3341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,128,0.3348
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,256,0.3306
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,1024,0.3322
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,512,0.3327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,2048,0.3363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,4096,0.3596
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,16384,0.3902
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,8192,0.3712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,32768,0.4364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,65536,0.5315
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,2,0.4593
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,4,0.4571
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,131072,0.7240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,8,0.4617
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,16,0.4626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,32,0.4599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,64,0.4592
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,256,0.4601
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,128,0.4597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,512,0.4600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,2048,0.4721
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,16384,0.5801
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,4096,0.4952
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,1024,0.4662
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,8192,0.5247
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,32768,0.6990
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,65536,0.9314
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,131072,1.4898
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,2,0.6998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,8,0.7013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,4,0.6978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,32,0.7045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,16,0.6986
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,64,0.7091
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,128,0.7123
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,256,0.7126
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,512,0.7147
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,2048,0.7364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,4096,0.7850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,1024,0.7221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,8192,0.8368
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,16384,0.9476
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,32768,1.1794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,65536,1.6795
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,2,1.1548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,8,1.1525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,4,1.1555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,16,1.1562
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,32,1.1568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,64,1.1580
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,128,1.1602
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,256,1.1646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,512,1.1717
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,2048,1.2109
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,4096,1.2930
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,2,0.1815
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,1024,1.1842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,16384,1.6206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,8192,1.4020
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,4,0.1901
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,16,0.1908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,8,0.2002
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,32,0.1817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,64,0.1958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,32768,2.0505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,128,0.1809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,256,0.1904
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,1024,0.1939
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,512,0.1801
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,2048,0.1878
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,4096,0.1984
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,8192,0.2162
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,16384,0.2058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,32768,0.2088
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,65536,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,131072,0.2434
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,2,0.2050
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,4,0.1947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,8,0.1997
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,16,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,32,0.2067
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,128,0.1938
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,64,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,256,0.1998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,512,0.1930
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,1024,0.2052
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,2048,0.2058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,4096,0.2226
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,8192,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,16384,0.2079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,32768,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,65536,0.2332
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,2,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,131072,0.2721
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,4,0.2057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,8,0.2022
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,16,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,64,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,32,0.2017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,128,0.2148
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,256,0.2028
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,512,0.2143
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,1024,0.2030
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,8192,0.2284
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,2048,0.2044
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,4096,0.2187
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,32768,0.2368
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,16384,0.2258
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,65536,0.2440
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,131072,0.2859
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,2,0.2090
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,8,0.2107
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,4,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,16,0.2126
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,32,0.2215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,64,0.2096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,256,0.2192
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,128,0.2173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,512,0.2119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,1024,0.2125
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,2048,0.2114
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,4096,0.2368
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,8192,0.2356
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,16384,0.2277
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,32768,0.2347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,65536,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,131072,0.3048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,2,0.2364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,4,0.2315
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,8,0.2394
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,16,0.2304
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,32,0.2312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,64,0.2328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,128,0.2302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,256,0.2382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,512,0.2401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,1024,0.2324
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,2048,0.2378
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,4096,0.2466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,8192,0.2531
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,16384,0.2485
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,32768,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,65536,0.2922
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,131072,0.3468
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,2,0.2351
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,4,0.2343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,8,0.2341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,16,0.2399
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,32,0.2341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,64,0.2401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,128,0.2387
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,256,0.2424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,512,0.2351
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,1024,0.2351
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,2048,0.2350
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,4096,0.2519
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,8192,0.2661
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,16384,0.2658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,32768,0.2817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,65536,0.3170
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,131072,0.3808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,2,0.2575
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,4,0.2570
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,8,0.2587
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,16,0.2651
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,32,0.2638
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,64,0.2634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,128,0.2606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,256,0.2592
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,512,0.2604
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,2048,0.2687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,4096,0.2777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,8192,0.2941
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,1024,0.2621
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,16384,0.2965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,32768,0.3207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,65536,0.3770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,131072,0.4882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,2,0.3055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,4,0.3028
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,8,0.3057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,16,0.3070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,32,0.2999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,64,0.3000
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,128,0.3022
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,256,0.3012
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,512,0.3073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,1024,0.3042
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,2048,0.3102
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,4096,0.3303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,8192,0.3405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,16384,0.3562
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,32768,0.4024
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,65536,0.5072
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,4,0.4158
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,2,0.4141
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,8,0.4176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,16,0.4143
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,131072,0.6979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,32,0.4151
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,64,0.4169
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,128,0.4188
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,256,0.4161
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,2048,0.4286
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,1024,0.4202
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,4096,0.4503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,8192,0.4813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,16384,0.5346
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,32768,0.6566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,512,0.4150
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,65536,0.8919
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,2,0.6149
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,4,0.6120
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,8,0.6141
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,16,0.6144
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,32,0.6186
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,64,0.6224
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,131072,1.4457
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,128,0.6240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,256,0.6254
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,512,0.6281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,2048,0.6468
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,1024,0.6355
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,16384,0.8649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,8192,0.7524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,4096,0.6958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,32768,1.0956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,4,1.0171
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,2,1.0122
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,8,1.0167
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,16,1.0188
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,32,1.0201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,65536,1.5980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,64,1.0273
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,128,1.0222
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,256,1.0273
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,512,1.0341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,2048,1.0728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,4096,1.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,2,0.1711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,1024,1.0467
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,8192,1.2604
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,4,0.1862
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,8,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,16,0.1704
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,16384,1.4803
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,32768,1.9045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,32,0.1798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,128,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,64,0.1844
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,256,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,512,0.1713
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,1024,0.1901
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,2048,0.1794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,4096,0.1860
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,65536,0.2064
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,8192,0.2052
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,32768,0.1909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,131072,0.2364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,16384,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,2,0.2005
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,4,0.1928
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,8,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,64,0.1818
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,16,0.1818
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,32,0.1979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,128,0.1835
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,256,0.1868
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,512,0.1950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,1024,0.1820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,2048,0.1822
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,4096,0.2085
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,8192,0.2025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,16384,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,32768,0.2037
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,65536,0.2330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,131072,0.2453
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,2,0.1870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,4,0.1846
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,8,0.1924
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,16,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,32,0.1942
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,64,0.1875
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,256,0.1866
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,512,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,1024,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,2048,0.1948
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,4096,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,16384,0.2096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,8192,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,128,0.2008
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,32768,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,65536,0.2263
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,131072,0.2587
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,2,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,4,0.1988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,8,0.1956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,16,0.2054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,32,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,64,0.2060
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,128,0.2085
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,256,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,512,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,1024,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,2048,0.1950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,4096,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,8192,0.2254
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,16384,0.2211
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,32768,0.2217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,65536,0.2527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,131072,0.2823
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,2,0.2224
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,4,0.2148
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,8,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,16,0.2198
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,32,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,64,0.2256
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,128,0.2253
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,256,0.2155
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,512,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,2048,0.2163
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,4096,0.2310
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,8192,0.2481
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,16384,0.2398
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,1024,0.2166
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,32768,0.2516
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,65536,0.2809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,131072,0.3234
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,2,0.2172
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,4,0.2177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,8,0.2262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,16,0.2207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,32,0.2273
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,64,0.2169
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,128,0.2177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,256,0.2251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,512,0.2269
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,1024,0.2176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,2048,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,4096,0.2395
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,16384,0.2492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,8192,0.2426
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,32768,0.2677
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,65536,0.2989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,131072,0.3657
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,2,0.2447
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,4,0.2409
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,8,0.2452
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,16,0.2397
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,32,0.2448
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,64,0.2426
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,128,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,256,0.2412
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,512,0.2475
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,2048,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,1024,0.2511
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,4096,0.2625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,8192,0.2757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,16384,0.2791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,32768,0.3030
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,65536,0.3625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,131072,0.4736
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,2,0.2817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,4,0.2770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,8,0.2765
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,16,0.2812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,32,0.2769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,64,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,128,0.2780
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,256,0.2825
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,512,0.2796
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,1024,0.2839
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,2048,0.2819
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,4096,0.3099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,8192,0.3206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,32768,0.3828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,65536,0.4783
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,16384,0.3340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,131072,0.6753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,2,0.3797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,4,0.3818
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,8,0.3838
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,16,0.3805
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,32,0.3833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,64,0.3815
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,128,0.3811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,256,0.3827
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,512,0.3824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,1024,0.3882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,2048,0.3960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,4096,0.4205
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,16384,0.5013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,8192,0.4438
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,32768,0.6199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,2,0.5542
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,8,0.5528
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,65536,0.8547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,16,0.5536
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,32,0.5554
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,4,0.5553
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,131072,1.4059
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,64,0.5581
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,128,0.5657
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,256,0.5621
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,512,0.5639
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,2048,0.5837
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,1024,0.5717
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,4096,0.6367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,16384,0.8024
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,8192,0.6874
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,65536,1.5387
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,2,0.8822
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,32768,1.0383
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,4,0.8813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,8,0.8846
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,16,0.8846
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,32,0.8869
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,64,0.8867
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,128,0.8891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,256,0.8905
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,512,0.9006
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,2048,0.9362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,1024,0.9120
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,4096,1.0257
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,16384,1.3525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,2,0.1624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,4,0.1727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,8,0.1609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,8192,1.1257
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,16,0.1707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,32,0.1596
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,64,0.1704
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,32768,1.7846
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,128,0.1685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,256,0.1687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,512,0.1603
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,1024,0.1699
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,2048,0.1590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,4096,0.1810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,8192,0.1889
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,16384,0.1837
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,32768,0.1920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,65536,0.1967
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,131072,0.2370
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,4,0.1723
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,2,0.1752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,16,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,32,0.1840
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,8,0.1813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,64,0.1806
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,128,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,256,0.1855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,512,0.1722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,1024,0.1767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,4096,0.2024
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,8192,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,16384,0.1897
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,32768,0.2045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,65536,0.2229
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,131072,0.2402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,2048,0.1739
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,2,0.1785
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,16,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,4,0.1838
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,8,0.1922
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,32,0.1784
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,64,0.1809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,128,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,256,0.1808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,1024,0.1801
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,512,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,2048,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,4096,0.1973
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,8192,0.2067
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,16384,0.2020
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,32768,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,65536,0.2225
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,131072,0.2666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,2,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,4,0.1967
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,16,0.1970
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,8,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,32,0.1864
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,64,0.1890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,128,0.1927
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,256,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,512,0.1865
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,1024,0.1985
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,2048,0.1898
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,4096,0.2039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,8192,0.2202
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,16384,0.2166
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,32768,0.2147
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,65536,0.2399
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,131072,0.2818
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,2,0.2075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,8,0.2164
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,4,0.2059
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,16,0.2070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,32,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,64,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,128,0.2121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,512,0.2201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,1024,0.2085
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,2048,0.2089
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,4096,0.2251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,256,0.2158
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,8192,0.2398
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,16384,0.2289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,32768,0.2480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,65536,0.2674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,2,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,4,0.2098
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,16,0.2099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,131072,0.3163
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,8,0.2189
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,32,0.2173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,128,0.2166
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,64,0.2095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,256,0.2096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,512,0.2137
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,1024,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,2048,0.2132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,4096,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,8192,0.2437
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,16384,0.2355
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,32768,0.2512
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,65536,0.2979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,131072,0.3707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,2,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,4,0.2339
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,8,0.2311
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,16,0.2375
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,32,0.2316
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,64,0.2313
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,128,0.2361
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,256,0.2391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,512,0.2338
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,1024,0.2350
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,2048,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,4096,0.2525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,8192,0.2685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,16384,0.2689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,32768,0.3005
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,65536,0.3543
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,2,0.2648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,131072,0.4688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,4,0.2655
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,8,0.2652
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,16,0.2658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,64,0.2688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,32,0.2700
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,128,0.2646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,256,0.2657
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,512,0.2728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,2048,0.2722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,4096,0.2967
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,1024,0.2712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,8192,0.3103
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,16384,0.3262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,32768,0.3677
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,65536,0.4683
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,2,0.3646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,4,0.3617
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,131072,0.6686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,8,0.3609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,16,0.3632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,32,0.3657
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,64,0.3646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,128,0.3635
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,256,0.3653
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,512,0.3677
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,2048,0.3755
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,1024,0.3715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,4096,0.4012
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,8192,0.4257
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,16384,0.4851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,32768,0.6027
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,65536,0.8386
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,4,0.5244
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,2,0.5245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,8,0.5257
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,16,0.5233
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,32,0.5250
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,64,0.5277
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,131072,1.3960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,128,0.5323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,256,0.5353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,2048,0.5568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,512,0.5341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,4096,0.6034
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,16384,0.7690
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,1024,0.5414
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,8192,0.6583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,65536,1.5063
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,2,0.8206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,32768,1.0039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,4,0.8189
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,8,0.8192
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,16,0.8207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,32,0.8219
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,64,0.8258
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,128,0.8235
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,256,0.8288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,512,0.8366
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,2048,0.8713
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,16384,1.2831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,4096,0.9594
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,2,0.1622
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,1024,0.8488
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,4,0.1638
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,8192,1.0670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,8,0.1586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,16,0.1583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,32,0.1618
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,64,0.1499
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,32768,1.7152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,128,0.1588
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,256,0.1607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,512,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,1024,0.1668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,2048,0.1564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,4096,0.1733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,8192,0.1656
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,16384,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,32768,0.1807
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,65536,0.1974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,131072,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,2,0.1618
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,4,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,8,0.1675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,16,0.1630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,32,0.1678
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,64,0.1726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,128,0.1731
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,256,0.1589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,512,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,1024,0.1595
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,2048,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,4096,0.1907
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,8192,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,16384,0.1835
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,32768,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,65536,0.2021
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,131072,0.2466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,2,0.1740
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,4,0.1771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,8,0.1751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,16,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,32,0.1856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,64,0.1670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,128,0.1733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,256,0.1816
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,512,0.1767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,1024,0.1846
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,2048,0.1748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,4096,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,8192,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,16384,0.2007
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,32768,0.2098
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,65536,0.2238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,131072,0.2533
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,2,0.1898
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,4,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,8,0.1810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,16,0.1915
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,32,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,64,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,128,0.1818
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,256,0.1877
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,512,0.1825
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,1024,0.1848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,2048,0.1941
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,4096,0.2099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,8192,0.2040
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,16384,0.2020
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,32768,0.2183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,65536,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,131072,0.2714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,2,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,4,0.2058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,8,0.2099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,16,0.2026
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,32,0.2022
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,64,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,128,0.2104
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,256,0.2099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,512,0.2017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,1024,0.2048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,2048,0.2155
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,4096,0.2270
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,8192,0.2288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,16384,0.2347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,32768,0.2373
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,65536,0.2742
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,131072,0.3170
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,2,0.2122
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,4,0.2024
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,8,0.2119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,16,0.2043
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,64,0.2038
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,128,0.2131
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,32,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,256,0.2064
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,512,0.2121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,2048,0.2101
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,1024,0.2090
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,4096,0.2249
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,8192,0.2408
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,16384,0.2395
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,32768,0.2568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,65536,0.2883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,131072,0.3572
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,2,0.2273
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,8,0.2286
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,16,0.2324
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,4,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,32,0.2341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,64,0.2274
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,128,0.2335
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,256,0.2290
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,512,0.2292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,1024,0.2357
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,2048,0.2337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,4096,0.2555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,8192,0.2598
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,16384,0.2647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,32768,0.2957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,65536,0.3527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,131072,0.4577
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,2,0.2623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,4,0.2652
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,8,0.2585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,16,0.2595
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,32,0.2598
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,64,0.2643
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,128,0.2642
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,256,0.2623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,512,0.2617
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,2048,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,1024,0.2690
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,4096,0.2881
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,8192,0.3034
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,16384,0.3222
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,32768,0.3673
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,65536,0.4629
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,2,0.3578
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,4,0.3531
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,131072,0.6569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,8,0.3530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,16,0.3522
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,32,0.3572
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,64,0.3548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,128,0.3582
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,256,0.3599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,512,0.3578
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,1024,0.3604
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,2048,0.3674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,4096,0.3906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,16384,0.4805
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,8192,0.4180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,32768,0.5972
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,65536,0.8388
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,4,0.5061
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,2,0.5055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,8,0.5045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,16,0.5073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,131072,1.3926
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,32,0.5097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,64,0.5139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,128,0.5186
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,256,0.5177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,512,0.5178
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,2048,0.5383
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,4096,0.5889
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,1024,0.5261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,16384,0.7556
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,8192,0.6432
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,32768,0.9895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,65536,1.4942
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,4,0.7908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,8,0.7899
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,2,0.7886
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,16,0.7908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,32,0.7911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,64,0.7969
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,128,0.7960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,256,0.7970
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,512,0.8045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,2048,0.8456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,2,0.1442
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,16384,1.2557
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,4096,0.9307
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,4,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,1024,0.8185
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,8192,1.0329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,8,0.1545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,16,0.1496
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,32,0.1564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,64,0.1561
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,128,0.1537
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,256,0.1503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,512,0.1500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,1024,0.1610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,32768,1.6871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,2048,0.1549
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,4096,0.1693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,8192,0.1669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,16384,0.1586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,32768,0.1669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,65536,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,131072,0.2216
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,2,0.1625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,4,0.1650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,8,0.1505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,16,0.1571
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,32,0.1600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,64,0.1522
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,128,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,256,0.1685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,512,0.1503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,1024,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,2048,0.1583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,4096,0.1725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,8192,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,32768,0.1929
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,16384,0.1702
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,65536,0.2105
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,131072,0.2242
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,2,0.1640
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,4,0.1666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,8,0.1628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,16,0.1776
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,32,0.1767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,64,0.1612
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,128,0.1722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,256,0.1638
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,512,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,1024,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,2048,0.1651
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,4096,0.1898
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,8192,0.1820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,16384,0.1838
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,32768,0.1895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,65536,0.2104
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,131072,0.2529
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,2,0.1868
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,4,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,8,0.1861
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,16,0.1762
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,32,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,64,0.1795
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,128,0.1863
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,256,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,512,0.1856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,2048,0.1801
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,1024,0.1780
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,4096,0.2034
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,8192,0.1999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,16384,0.2081
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,32768,0.2062
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,65536,0.2397
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,131072,0.2685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,2,0.1987
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,4,0.1963
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,8,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,16,0.2043
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,32,0.2045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,64,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,128,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,256,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,512,0.1973
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,1024,0.2098
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,4096,0.2160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,2048,0.2075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,8192,0.2319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,16384,0.2207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,32768,0.2315
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,65536,0.2701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,131072,0.3127
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,2,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,4,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,8,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,16,0.1988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,32,0.1992
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,64,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,128,0.1993
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,256,0.2086
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,512,0.2107
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,1024,0.2047
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,2048,0.2132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,4096,0.2226
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,8192,0.2305
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,16384,0.2299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,32768,0.2493
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,65536,0.2924
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,131072,0.3556
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,2,0.2301
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,4,0.2273
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,8,0.2229
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,16,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,32,0.2232
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,64,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,128,0.2238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,256,0.2299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,512,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,1024,0.2341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,2048,0.2278
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,4096,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,8192,0.2570
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,16384,0.2621
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,32768,0.2959
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,65536,0.3543
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,131072,0.4578
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,2,0.2543
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,4,0.2586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,8,0.2542
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,16,0.2551
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,32,0.2598
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,64,0.2536
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,128,0.2615
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,256,0.2579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,512,0.2604
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,1024,0.2618
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,2048,0.2695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,4096,0.2844
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,8192,0.3063
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,16384,0.3157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,32768,0.3652
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,65536,0.4632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,2,0.3479
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,131072,0.6557
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,4,0.3519
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,8,0.3513
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,16,0.3485
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,32,0.3492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,64,0.3526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,128,0.3495
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,256,0.3518
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,512,0.3548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,2048,0.3666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,1024,0.3546
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,4096,0.3875
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,16384,0.4755
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,8192,0.4168
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,32768,0.5986
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,65536,0.8255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,2,0.5012
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,4,0.4994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,16,0.4994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,32,0.4994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,8,0.4984
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,64,0.5029
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,128,0.5096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,131072,1.3827
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,256,0.5104
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,512,0.5138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,2048,0.5326
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,16384,0.7471
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,4096,0.5789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,1024,0.5175
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,8192,0.6353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,32768,0.9836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,2,0.7750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,4,0.7753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,8,0.7720
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,16,0.7735
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,32,0.7753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,65536,1.4982
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,64,0.7794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,128,0.7771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,256,0.7836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,512,0.7879
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,16384,1.2425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,2048,0.8333
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,2,0.1422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,32768,1.6714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,4,0.1483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,8,0.1611
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,1024,0.8045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,16,0.1625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,8192,1.0215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,32,0.1418
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,64,0.1475
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,4096,0.9145
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,128,0.1603
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,256,0.1600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,512,0.1405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,1024,0.1483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,4096,0.1648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,8192,0.1757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,32768,0.1778
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,16384,0.1668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,2048,0.1405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,65536,0.1854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,131072,0.2060
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,2,0.1693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,4,0.1682
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,16,0.1549
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,32,0.1504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,8,0.1645
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,128,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,64,0.1576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,256,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,512,0.1614
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,1024,0.1586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,2048,0.1685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,4096,0.1609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,8192,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,16384,0.1815
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,32768,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,65536,0.1951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,131072,0.2327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,2,0.1564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,4,0.1660
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,8,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,16,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,32,0.1661
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,64,0.1704
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,128,0.1571
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,256,0.1613
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,512,0.1727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,1024,0.1592
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,2048,0.1662
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,4096,0.1719
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,8192,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,32768,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,16384,0.1880
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,65536,0.2108
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,131072,0.2539
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,2,0.1800
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,4,0.1716
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,8,0.1687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,16,0.1695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,32,0.1766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,64,0.1838
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,128,0.1718
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,256,0.1802
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,512,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,1024,0.1690
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,2048,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,4096,0.1978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,8192,0.1937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,16384,0.1984
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,32768,0.2038
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,65536,0.2356
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,131072,0.2649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,2,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,4,0.1915
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,8,0.2039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,16,0.1923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,32,0.2030
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,64,0.1923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,128,0.1988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,256,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,512,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,1024,0.2047
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,2048,0.2068
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,4096,0.2251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,8192,0.2226
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,16384,0.2184
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,32768,0.2295
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,65536,0.2714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,131072,0.3100
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,2,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,4,0.2047
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,8,0.2024
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,16,0.1920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,32,0.1951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,64,0.2048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,128,0.1976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,256,0.1969
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,512,0.2039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,1024,0.2048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,2048,0.2029
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,4096,0.2250
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,8192,0.2269
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,16384,0.2361
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,32768,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,65536,0.2796
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,131072,0.3583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,2,0.2177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,4,0.2266
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,8,0.2219
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,16,0.2218
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,32,0.2271
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,64,0.2191
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,128,0.2284
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,256,0.2238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,512,0.2292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,1024,0.2236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,2048,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,4096,0.2514
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,8192,0.2539
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,16384,0.2654
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,32768,0.2934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,65536,0.3470
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,2,0.2523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,131072,0.4547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,4,0.2563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,8,0.2505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,32,0.2565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,16,0.2501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,64,0.2568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,128,0.2511
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,256,0.2556
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,512,0.2568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,2048,0.2624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,1024,0.2638
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,4096,0.2868
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,16384,0.3126
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,32768,0.3580
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,65536,0.4585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,8192,0.3028
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,131072,0.6579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,4,0.3472
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,2,0.3491
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,8,0.3455
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,16,0.3453
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,32,0.3473
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,64,0.3469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,128,0.3501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,256,0.3505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,512,0.3517
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,2048,0.3638
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,1024,0.3520
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,4096,0.3836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,8192,0.4127
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,16384,0.4707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,32768,0.5939
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,2,0.4942
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,4,0.4963
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,65536,0.8255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,8,0.4943
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,131072,1.3825
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,16,0.4949
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,32,0.4939
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,64,0.5018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,128,0.5052
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,256,0.5048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,512,0.5074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,2048,0.5293
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,1024,0.5138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,4096,0.5776
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,16384,0.7464
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,8192,0.6288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,32768,0.9799
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,2,0.7634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,65536,1.4808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,4,0.7658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,16,0.7679
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,8,0.7626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,32,0.7709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,64,0.7703
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,128,0.7717
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,256,0.7727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,2048,0.8217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,512,0.7837
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,16384,1.2393
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,4096,0.9131
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,1024,0.7973
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,8192,1.0157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,32768,1.6706
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,2,0.2454
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,64,0.2565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,4,0.2432
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,8,0.2529
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,512,0.2466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,32,0.2609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,16,0.2390
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,128,0.2467
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,1024,0.2576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,2048,0.2438
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,4096,0.2670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,8192,0.2793
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,16384,0.2701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,32768,0.2578
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,65536,0.2958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,131072,0.3112
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,256,0.2414
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,2,0.3210
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,8,0.3407
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,32,0.3279
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,16,0.3472
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,64,0.3450
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,512,0.3223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,128,0.3276
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,4,0.3254
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,1024,0.3415
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,2048,0.3538
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,4096,0.3480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,8192,0.3704
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,16384,0.3401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,32768,0.3470
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,65536,0.3810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,131072,0.4157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,2,0.2613
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,4,0.2752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,8,0.2572
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,16,0.2607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,256,0.3216
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,32,0.2814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,64,0.2750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,128,0.2599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,256,0.2619
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,512,0.2755
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,1024,0.2599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,4096,0.2810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,2048,0.2834
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,8192,0.2903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,16384,0.2955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,32768,0.2820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,65536,0.3199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,131072,0.3366
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,2,0.2722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,4,0.2888
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,8,0.2769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,16,0.2722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,32,0.2848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,64,0.2737
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,128,0.2839
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,256,0.2878
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,1024,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,512,0.2718
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,4096,0.3004
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,2048,0.2798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,8192,0.3152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,16384,0.2979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,32768,0.3122
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,65536,0.3424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,2,0.2943
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,131072,0.3651
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,8,0.2968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,4,0.2917
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,16,0.3036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,32,0.2952
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,64,0.3016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,128,0.3081
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,256,0.2960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,512,0.2998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,1024,0.3089
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,4096,0.3217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,8192,0.3292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,2048,0.3010
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,16384,0.3285
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,32768,0.3471
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,65536,0.3646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,131072,0.4103
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,2,0.3084
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,4,0.3030
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,8,0.3020
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,16,0.3082
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,32,0.2981
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,64,0.3118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,128,0.3012
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,256,0.3013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,512,0.3136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,1024,0.3091
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,2048,0.3206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,4096,0.3314
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,8192,0.3486
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,16384,0.3313
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,32768,0.3508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,65536,0.3896
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,131072,0.4658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,2,0.3350
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,4,0.3293
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,8,0.3289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,16,0.3381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,32,0.3254
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,64,0.3298
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,128,0.3299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,256,0.3372
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,512,0.3395
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,2048,0.3429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,1024,0.3469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,4096,0.3655
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,8192,0.3725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,16384,0.3780
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,32768,0.4021
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,65536,0.4710
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,2,0.4199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,8,0.4131
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,4,0.4229
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,131072,0.5762
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,16,0.4153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,32,0.4139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,64,0.4168
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,128,0.4231
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,256,0.4179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,512,0.4254
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,1024,0.4247
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,2048,0.4447
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,4096,0.4649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,8192,0.4830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,16384,0.4919
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,32768,0.5481
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,65536,0.6397
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,2,0.5680
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,4,0.5755
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,8,0.5708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,131072,0.8379
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,16,0.5736
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,32,0.5722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,64,0.5776
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,128,0.5765
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,512,0.5874
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,256,0.5799
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,1024,0.6049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,2048,0.6330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,4096,0.6608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,8192,0.6943
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,16384,0.7492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,32768,0.8660
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,65536,1.0956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,2,0.8608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,8,0.8639
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,4,0.9212
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,131072,1.6462
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,16,0.8650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,32,0.9107
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,64,0.8676
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,128,0.8689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,256,0.8758
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,512,0.8906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,1024,0.9187
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,2048,0.9671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,4096,1.0244
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,8192,1.0883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,16384,1.1942
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,32768,1.4310
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,2,1.4946
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,65536,1.9381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,8,1.4983
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,16,1.4968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,4,1.4958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,32,1.5164
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,64,1.5024
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,128,1.5060
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,256,1.5240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,512,1.5465
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,2048,1.6896
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,1024,1.5947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,4096,1.7976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,2,0.2101
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,16384,2.1256
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,8192,1.9254
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,4,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,32768,2.5555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,8,0.2141
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,16,0.2107
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,64,0.2251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,32,0.2268
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,128,0.2103
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,256,0.2155
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,512,0.2239
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,1024,0.2156
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,2048,0.2148
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,4096,0.2485
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,16384,0.2416
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,32768,0.2289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,8192,0.2554
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,65536,0.2489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,131072,0.2897
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,2,0.3119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,4,0.2934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,8,0.3183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,16,0.3097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,32,0.2908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,64,0.2960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,128,0.3086
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,512,0.3214
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,1024,0.2933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,2048,0.2958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,4096,0.3317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,256,0.2987
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,8192,0.3148
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,16384,0.3351
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,32768,0.3316
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,65536,0.3382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,131072,0.3573
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,2,0.2366
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,8,0.2500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,4,0.2384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,16,0.2538
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,32,0.2573
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,64,0.2486
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,128,0.2407
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,256,0.2410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,512,0.2401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,2048,0.2532
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,1024,0.2444
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,4096,0.2626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,8192,0.2826
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,16384,0.2558
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,32768,0.2593
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,65536,0.2958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,131072,0.3182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,2,0.2458
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,4,0.2498
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,16,0.2605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,8,0.2575
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,32,0.2490
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,64,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,128,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,256,0.2586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,512,0.2607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,1024,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,2048,0.2554
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,4096,0.2712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,8192,0.2874
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,16384,0.2684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,32768,0.2754
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,65536,0.3120
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,131072,0.3476
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,2,0.2634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,4,0.2699
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,8,0.2674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,16,0.2755
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,32,0.2817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,64,0.2664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,128,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,256,0.2773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,512,0.2673
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,1024,0.2686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,2048,0.2780
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,4096,0.3048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,8192,0.2977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,16384,0.2928
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,32768,0.3122
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,65536,0.3433
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,131072,0.3958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,2,0.2685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,4,0.2695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,8,0.2665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,32,0.2708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,16,0.2692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,64,0.2755
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,128,0.2777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,256,0.2816
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,512,0.2705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,1024,0.2746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,2048,0.2803
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,4096,0.2975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,8192,0.2977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,16384,0.3080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,32768,0.3261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,65536,0.3672
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,131072,0.4247
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,2,0.2912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,4,0.2900
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,8,0.2944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,16,0.2903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,32,0.2887
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,128,0.3002
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,64,0.2977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,256,0.2902
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,512,0.2936
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,1024,0.2959
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,2048,0.3076
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,4096,0.3210
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,16384,0.3333
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,8192,0.3407
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,32768,0.3664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,65536,0.4212
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,131072,0.5296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,2,0.3572
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,4,0.3542
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,8,0.3616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,16,0.3544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,32,0.3513
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,64,0.3625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,128,0.3533
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,256,0.3557
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,512,0.3651
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,1024,0.3674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,2048,0.3772
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,4096,0.3996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,8192,0.4240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,16384,0.4274
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,32768,0.4768
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,65536,0.5769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,2,0.4704
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,131072,0.7766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,4,0.4690
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,16,0.4684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,8,0.4693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,32,0.4725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,64,0.4697
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,128,0.4729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,256,0.4763
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,512,0.4814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,1024,0.5010
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,2048,0.5230
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,4096,0.5589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,8192,0.5823
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,16384,0.6387
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,32768,0.7586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,2,0.6926
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,131072,1.5501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,65536,0.9912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,8,0.6957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,16,0.6980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,4,0.6919
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,32,0.6958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,64,0.7048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,128,0.7101
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,256,0.7129
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,512,0.7259
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,2048,0.8036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,1024,0.7752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,4096,0.8612
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,8192,0.9153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,16384,1.0304
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,32768,1.2650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,2,1.1845
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,65536,1.7634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,4,1.1965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,8,1.1864
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,16,1.3574
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,32,1.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,64,1.2002
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,128,1.2032
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,256,1.2095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,512,1.2375
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,1024,1.2934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,2048,1.3855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,2,0.2042
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,4096,1.4909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,16384,1.8151
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,8192,1.5923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,8,0.2054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,4,0.1958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,16,0.1957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,32768,2.2498
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,32,0.1987
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,64,0.2072
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,128,0.1944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,256,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,512,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,1024,0.1939
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,2048,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,8192,0.2213
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,4096,0.2285
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,32768,0.2137
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,16384,0.2252
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,65536,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,131072,0.2569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,2,0.2923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,4,0.2783
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,8,0.2794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,16,0.3089
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,32,0.2712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,64,0.2732
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,128,0.2979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,256,0.2905
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,512,0.2743
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,1024,0.2821
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,2048,0.2846
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,4096,0.3175
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,8192,0.2970
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,16384,0.3080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,32768,0.2948
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,65536,0.3227
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,131072,0.3710
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,2,0.2206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,4,0.2402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,8,0.2217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,16,0.2205
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,32,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,64,0.2330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,128,0.2406
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,256,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,512,0.2362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,1024,0.2225
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,2048,0.2272
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,4096,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,8192,0.2520
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,16384,0.2598
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,32768,0.2564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,65536,0.2650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,131072,0.3001
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,4,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,2,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,8,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,16,0.2311
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,32,0.2495
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,64,0.2411
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,128,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,256,0.2341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,512,0.2322
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,1024,0.2462
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,2048,0.2394
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,4096,0.2714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,8192,0.2701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,16384,0.2529
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,32768,0.2612
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,65536,0.2824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,131072,0.3415
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,2,0.2489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,4,0.2521
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,8,0.2594
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,16,0.2589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,32,0.2508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,64,0.2508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,128,0.2475
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,256,0.2674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,512,0.2515
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,1024,0.2539
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,2048,0.2685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,4096,0.2746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,8192,0.2886
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,16384,0.2769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,32768,0.2835
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,65536,0.3260
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,131072,0.3652
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,2,0.2480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,4,0.2490
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,8,0.2565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,16,0.2547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,32,0.2500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,64,0.2458
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,128,0.2609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,256,0.2495
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,512,0.2558
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,1024,0.2547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,2048,0.2609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,4096,0.2856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,8192,0.2838
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,16384,0.2794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,32768,0.3087
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,65536,0.3376
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,131072,0.4122
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,2,0.2606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,4,0.2647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,8,0.2691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,16,0.2648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,32,0.2600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,64,0.2707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,128,0.2641
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,256,0.2692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,512,0.2753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,1024,0.2685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,2048,0.2757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,4096,0.3034
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,8192,0.3039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,16384,0.3084
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,32768,0.3346
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,65536,0.3989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,131072,0.5116
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,2,0.3149
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,4,0.3125
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,8,0.3156
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,16,0.3126
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,32,0.3206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,64,0.3153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,128,0.3182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,256,0.3236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,512,0.3205
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,1024,0.3246
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,2048,0.3391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,4096,0.3589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,8192,0.3845
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,16384,0.3908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,32768,0.4410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,65536,0.5427
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,131072,0.7351
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,2,0.4083
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,4,0.4094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,8,0.4065
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,16,0.4139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,32,0.4083
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,64,0.4089
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,128,0.4174
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,256,0.4144
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,512,0.4188
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,2048,0.4561
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,1024,0.4329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,16384,0.5779
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,4096,0.4903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,8192,0.5242
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,32768,0.6978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,65536,0.9304
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,2,0.5696
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,4,0.5659
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,8,0.5701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,16,0.5697
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,32,0.5724
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,64,0.6532
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,131072,1.4856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,128,0.6373
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,256,0.5838
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,512,0.5944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,2048,0.6699
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,1024,0.6227
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,4096,0.7358
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,8192,0.7895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,32768,1.1330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,16384,0.9016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,2,0.9363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,65536,1.6359
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,4,0.9381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,16,0.9454
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,8,1.0947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,32,0.9554
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,64,0.9531
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,128,0.9618
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,256,0.9683
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,512,0.9942
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,2048,1.1426
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,1024,1.0445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,4096,1.2475
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,16384,1.5771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,8192,1.3575
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,32768,1.9934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,2,0.2617
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,4,0.2693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,8,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,16,0.2489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,32,0.2625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,64,0.2508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,256,0.2508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,512,0.2523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,128,0.2597
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,2048,0.2514
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,1024,0.2693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,4096,0.2793
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,8192,0.2688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,16384,0.2907
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,32768,0.2705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,65536,0.2930
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,131072,0.3319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,2,0.3373
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,4,0.3561
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,8,0.3543
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,16,0.3388
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,32,0.3340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,64,0.3619
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,128,0.3640
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,256,0.3324
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,512,0.3381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,1024,0.3567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,2048,0.3560
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,4096,0.3531
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,8192,0.3518
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,16384,0.3762
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,32768,0.3582
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,65536,0.3714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,2,0.2888
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,131072,0.4198
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,4,0.2966
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,8,0.2820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,16,0.2808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,32,0.3005
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,64,0.2840
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,128,0.2768
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,256,0.2944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,512,0.2866
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,1024,0.2797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,2048,0.2958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,4096,0.2985
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,8192,0.3224
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,16384,0.3104
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,32768,0.3034
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,65536,0.3203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,2,0.2988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,131072,0.3551
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,4,0.3138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,8,0.2978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,16,0.3136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,32,0.3117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,64,0.2998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,128,0.2967
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,256,0.2975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,1024,0.3091
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,512,0.2981
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,2048,0.3143
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,4096,0.3114
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,8192,0.3310
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,16384,0.3152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,32768,0.3228
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,65536,0.3445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,131072,0.3803
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,2,0.3317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,4,0.3336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,8,0.3194
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,16,0.3289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,32,0.3196
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,128,0.3215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,256,0.3204
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,64,0.3217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,512,0.3307
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,1024,0.3363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,2048,0.3186
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,4096,0.3441
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,8192,0.3416
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,16384,0.3397
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,65536,0.3814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,32768,0.3498
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,2,0.3474
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,131072,0.4400
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,4,0.3351
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,8,0.3448
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,16,0.3366
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,32,0.3381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,128,0.3366
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,64,0.3485
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,256,0.3382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,512,0.3466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,2048,0.3467
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,1024,0.3391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,4096,0.3563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,8192,0.3653
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,16384,0.3750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,32768,0.3808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,65536,0.4177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,131072,0.4961
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,2,0.4000
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,4,0.3927
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,8,0.3939
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,16,0.4024
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,32,0.3936
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,64,0.3943
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,256,0.4005
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,128,0.3909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,1024,0.3998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,512,0.3966
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,2048,0.3963
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,4096,0.4228
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,8192,0.4273
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,16384,0.4358
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,32768,0.4575
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,65536,0.5132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,131072,0.6237
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,4,0.5026
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,2,0.4962
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,8,0.5048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,16,0.4994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,32,0.5014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,64,0.4988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,128,0.5007
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,256,0.4964
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,1024,0.5079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,512,0.4967
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,2048,0.5133
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,4096,0.5277
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,8192,0.5463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,16384,0.5575
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,32768,0.6054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,65536,0.6977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,2,0.7455
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,4,0.7515
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,131072,0.8999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,16,0.7457
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,8,0.7456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,32,0.7444
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,64,0.7497
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,128,0.7516
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,256,0.7602
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,512,0.7628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,1024,0.7610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,2048,0.7701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,4096,0.7946
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,8192,0.8228
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,16384,0.8812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,32768,0.9941
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,65536,1.2366
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,2,1.1974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,131072,1.7869
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,4,1.2026
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,8,1.1985
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,16,1.1961
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,32,1.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,64,1.2041
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,128,1.2090
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,256,1.2134
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,512,1.2136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,2048,1.2442
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,4096,1.2891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,1024,1.2261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,8192,1.3453
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,16384,1.4564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,32768,1.6896
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,65536,2.2031
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,2,2.0391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,4,2.0576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,8,2.0465
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,16,2.0458
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,32,2.0502
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,64,2.0464
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,128,2.0478
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,256,2.0514
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,512,2.0567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,2048,2.1195
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,2,0.2070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,4096,2.1921
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,16384,2.5130
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,1024,2.0754
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,4,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,8192,2.2957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,8,0.2120
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,16,0.2075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,32768,2.9511
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,32,0.2124
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,64,0.2068
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,128,0.2123
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,256,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,512,0.2059
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,1024,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,2048,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,4096,0.2353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,8192,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,16384,0.2206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,65536,0.2606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,32768,0.2296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,131072,0.2676
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,2,0.2929
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,4,0.3123
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,8,0.3142
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,16,0.3100
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,64,0.2874
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,32,0.2865
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,128,0.2933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,256,0.2920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,512,0.2944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,1024,0.3115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,2048,0.3103
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,4096,0.3276
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,8192,0.3084
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,16384,0.3013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,32768,0.3099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,65536,0.3292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,131072,0.3596
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,2,0.2510
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,4,0.2538
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,8,0.2524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,16,0.2372
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,32,0.2360
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,64,0.2360
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,128,0.2381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,256,0.2528
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,512,0.2504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,2048,0.2404
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,1024,0.2352
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,4096,0.2692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,8192,0.2577
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,16384,0.2676
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,32768,0.2570
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,65536,0.2913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,131072,0.3084
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,2,0.2579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,4,0.2454
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,8,0.2421
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,16,0.2456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,32,0.2594
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,64,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,128,0.2548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,256,0.2583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,512,0.2423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,1024,0.2470
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,2048,0.2497
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,4096,0.2636
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,8192,0.2794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,16384,0.2612
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,32768,0.2772
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,65536,0.3044
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,2,0.2678
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,131072,0.3278
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,4,0.2683
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,8,0.2668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,16,0.2821
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,32,0.2691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,64,0.2743
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,128,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,256,0.2796
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,512,0.2677
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,1024,0.2715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,2048,0.2695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,4096,0.2828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,8192,0.2978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,16384,0.2976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,32768,0.3111
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,65536,0.3255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,131072,0.3738
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,2,0.2739
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,4,0.2694
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,8,0.2723
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,16,0.2783
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,32,0.2812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,64,0.2854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,128,0.2733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,256,0.2724
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,512,0.2738
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,1024,0.2694
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,2048,0.2758
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,4096,0.2981
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,8192,0.3074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,16384,0.3079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,32768,0.3134
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,131072,0.4209
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,65536,0.3511
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,2,0.3023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,4,0.3031
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,8,0.3088
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,16,0.3108
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,32,0.3094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,64,0.3024
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,256,0.3029
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,128,0.3020
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,512,0.3087
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,1024,0.3137
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,2048,0.3040
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,16384,0.3439
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,8192,0.3337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,4096,0.3223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,32768,0.3649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,65536,0.4213
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,2,0.3618
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,131072,0.5399
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,4,0.3559
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,8,0.3591
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,16,0.3623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,32,0.3552
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,64,0.3590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,128,0.3574
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,256,0.3658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,512,0.3622
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,1024,0.3591
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,2048,0.3617
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,4096,0.3896
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,8192,0.3960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,32768,0.4607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,16384,0.4204
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,65536,0.5606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,2,0.4883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,131072,0.7534
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,4,0.4868
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,8,0.4918
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,16,0.4932
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,32,0.4871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,64,0.4925
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,128,0.4895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,256,0.4914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,512,0.4923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,1024,0.4940
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,2048,0.5048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,4096,0.5311
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,16384,0.6108
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,8192,0.5571
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,32768,0.7346
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,65536,0.9655
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,131072,1.5217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,2,0.7266
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,4,0.7272
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,8,0.7301
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,32,0.7329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,16,0.7245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,64,0.7344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,128,0.7416
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,256,0.7433
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,2048,0.7685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,512,0.7485
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,1024,0.7497
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,4096,0.8175
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,8192,0.8676
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,16384,0.9839
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,32768,1.2150
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,2,1.1881
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,4,1.1994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,65536,1.7204
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,16,1.1906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,8,1.1849
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,32,1.1895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,64,1.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,128,1.1929
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,256,1.1985
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,2048,1.2537
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,512,1.2072
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,4096,1.3293
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,1024,1.2211
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,8192,1.4362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,16384,1.6540
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,2,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,4,0.1956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,8,0.2048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,32768,2.0913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,16,0.1917
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,32,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,64,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,128,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,256,0.1964
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,512,0.2054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,1024,0.1976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,2048,0.1916
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,4096,0.2114
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,8192,0.2267
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,16384,0.2095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,32768,0.2114
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,131072,0.2710
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,65536,0.2426
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,2,0.2781
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,4,0.2765
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,8,0.2817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,16,0.2974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,32,0.2788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,64,0.2996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,128,0.2747
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,256,0.2725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,512,0.2996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,1024,0.2802
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,2048,0.2744
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,4096,0.3132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,8192,0.3014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,16384,0.2858
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,32768,0.3198
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,65536,0.3119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,131072,0.3639
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,2,0.2243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,4,0.2401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,8,0.2218
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,16,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,32,0.2237
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,128,0.2385
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,64,0.2382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,512,0.2252
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,1024,0.2267
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,2048,0.2232
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,256,0.2384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,4096,0.2407
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,8192,0.2482
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,16384,0.2547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,32768,0.2454
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,65536,0.2632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,131072,0.3105
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,2,0.2346
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,4,0.2353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,8,0.2324
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,16,0.2469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,32,0.2504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,128,0.2364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,64,0.2344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,256,0.2333
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,512,0.2485
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,1024,0.2384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,2048,0.2326
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,4096,0.2643
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,8192,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,16384,0.2509
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,32768,0.2589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,65536,0.2794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,2,0.2630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,131072,0.3210
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,4,0.2507
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,8,0.2705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,16,0.2640
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,32,0.2570
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,64,0.2569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,128,0.2582
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,256,0.2540
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,512,0.2669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,2048,0.2527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,1024,0.2701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,4096,0.2797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,8192,0.2795
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,16384,0.2733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,32768,0.2842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,65536,0.3188
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,131072,0.3736
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,2,0.2537
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,4,0.2605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,8,0.2592
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,16,0.2657
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,32,0.2695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,64,0.2574
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,128,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,256,0.2545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,512,0.2589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,1024,0.2602
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,2048,0.2733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,4096,0.2839
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,8192,0.2861
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,16384,0.2847
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,32768,0.2988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,65536,0.3350
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,131072,0.4172
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,4,0.2943
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,8,0.2926
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,2,0.2820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,16,0.2841
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,32,0.2840
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,64,0.2932
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,128,0.2850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,256,0.2817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,512,0.2915
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,1024,0.2921
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,2048,0.2872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,4096,0.3054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,8192,0.3157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,16384,0.3301
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,32768,0.3480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,65536,0.4055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,131072,0.5242
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,2,0.3372
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,4,0.3328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,8,0.3288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,16,0.3390
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,32,0.3310
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,64,0.3319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,128,0.3291
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,256,0.3330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,512,0.3378
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,1024,0.3414
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,2048,0.3330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,4096,0.3653
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,8192,0.3734
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,16384,0.3881
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,32768,0.4336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,65536,0.5383
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,2,0.4512
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,131072,0.7311
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,8,0.4452
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,4,0.4535
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,16,0.4457
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,32,0.4495
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,64,0.4492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,128,0.4512
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,256,0.4567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,512,0.4558
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,2048,0.4608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,1024,0.4527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,4096,0.4863
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,16384,0.5761
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,8192,0.5178
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,32768,0.6951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,65536,0.9293
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,4,0.6473
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,2,0.6510
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,8,0.6475
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,131072,1.4867
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,16,0.6523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,32,0.6547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,64,0.6550
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,128,0.6631
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,256,0.6622
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,512,0.6616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,1024,0.6722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,2048,0.6849
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,4096,0.7365
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,16384,0.9035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,8192,0.7912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,32768,1.1384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,2,1.0441
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,4,1.0480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,8,1.0505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,65536,1.6389
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,16,1.0527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,32,1.0559
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,64,1.0618
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,128,1.0633
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,256,1.0595
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,512,1.0682
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,2048,1.1132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,16384,1.5217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,1024,1.0819
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,2,0.1879
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,4096,1.1989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,8192,1.3071
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,4,0.1819
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,32768,1.9503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,8,0.1870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,16,0.1806
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,32,0.1881
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,64,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,256,0.1764
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,128,0.1815
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,512,0.1879
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,1024,0.1956
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,2048,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,4096,0.2067
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,8192,0.2108
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,16384,0.1925
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,65536,0.2119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,32768,0.2082
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,131072,0.2419
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,2,0.2475
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,4,0.2792
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,8,0.2571
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,16,0.2670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,32,0.2426
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,64,0.2610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,128,0.2658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,256,0.2547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,512,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,1024,0.2733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,2048,0.2531
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,4096,0.2914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,16384,0.2775
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,8192,0.2647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,32768,0.2680
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,65536,0.2968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,131072,0.3565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,2,0.2275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,4,0.2095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,8,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,16,0.2112
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,64,0.2280
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,128,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,256,0.2137
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,512,0.2073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,1024,0.2319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,2048,0.2283
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,32,0.2131
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,4096,0.2454
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,16384,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,8192,0.2510
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,32768,0.2366
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,65536,0.2582
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,131072,0.2890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,2,0.2365
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,4,0.2206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,8,0.2374
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,16,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,32,0.2239
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,64,0.2206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,128,0.2244
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,256,0.2186
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,512,0.2364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,1024,0.2377
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,2048,0.2233
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,4096,0.2424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,8192,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,16384,0.2410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,32768,0.2507
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,65536,0.2857
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,131072,0.3104
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,2,0.2564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,4,0.2418
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,8,0.2533
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,16,0.2409
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,32,0.2414
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,64,0.2518
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,128,0.2425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,256,0.2469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,512,0.2570
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,1024,0.2437
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,2048,0.2553
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,4096,0.2609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,8192,0.2795
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,16384,0.2794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,32768,0.2757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,65536,0.3041
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,131072,0.3550
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,2,0.2452
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,4,0.2558
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,8,0.2524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,16,0.2546
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,64,0.2435
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,32,0.2432
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,128,0.2480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,256,0.2419
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,512,0.2466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,1024,0.2592
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,2048,0.2577
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,4096,0.2677
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,16384,0.2813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,8192,0.2753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,32768,0.2895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,65536,0.3299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,131072,0.3947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,2,0.2772
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,8,0.2681
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,4,0.2713
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,16,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,32,0.2677
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,64,0.2692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,128,0.2650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,256,0.2791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,512,0.2708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,1024,0.2737
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,4096,0.3003
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,8192,0.3024
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,16384,0.3161
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,2048,0.2776
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,32768,0.3320
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,65536,0.3908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,131072,0.5044
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,2,0.3096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,4,0.3139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,16,0.3153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,8,0.3081
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,32,0.3073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,64,0.3068
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,128,0.3143
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,256,0.3121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,512,0.3215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,2048,0.3182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,1024,0.3132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,4096,0.3440
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,16384,0.3639
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,8192,0.3512
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,32768,0.4124
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,65536,0.5168
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,131072,0.7157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,2,0.4158
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,4,0.4158
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,8,0.4195
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,16,0.4154
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,32,0.4155
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,64,0.4181
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,128,0.4219
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,256,0.4203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,512,0.4275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,1024,0.4254
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,2048,0.4318
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,4096,0.4541
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,16384,0.5425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,8192,0.4886
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,32768,0.6665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,65536,0.8996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,131072,1.4586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,2,0.5934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,8,0.5951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,16,0.5946
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,32,0.5989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,64,0.5998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,4,0.5898
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,128,0.6038
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,256,0.6033
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,512,0.6097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,2048,0.6296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,1024,0.6151
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,4096,0.6777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,8192,0.7360
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,32768,1.0818
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,16384,0.8410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,65536,1.5853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,4,0.9312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,2,0.9321
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,8,0.9281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,16,0.9348
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,32,0.9346
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,64,0.9352
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,128,0.9333
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,256,0.9368
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,2048,0.9843
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,512,0.9504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,2,0.1718
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,4096,1.0797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,1024,0.9611
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,4,0.1660
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,8,0.1836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,16,0.1864
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,8192,1.1798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,16384,1.4003
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,32,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,64,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,128,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,256,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,32768,1.8259
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,512,0.1868
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,2048,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,1024,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,4096,0.1778
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,16384,0.1814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,8192,0.1939
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,32768,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,131072,0.2440
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,65536,0.2039
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,2,0.2357
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,4,0.2719
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,8,0.2634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,16,0.2344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,32,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,64,0.2539
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,128,0.2337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,256,0.2449
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,1024,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,2048,0.2385
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,4096,0.2478
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,8192,0.2675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,512,0.2742
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,16384,0.2707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,32768,0.2530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,65536,0.3210
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,2,0.2017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,4,0.2037
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,8,0.2073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,16,0.2210
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,131072,0.3075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,32,0.2177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,64,0.2076
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,128,0.2160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,256,0.2022
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,512,0.2050
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,1024,0.2257
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,2048,0.2080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,4096,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,8192,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,16384,0.2223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,32768,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,65536,0.2491
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,131072,0.2998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,2,0.2102
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,4,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,8,0.2246
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,16,0.2160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,32,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,64,0.2126
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,128,0.2122
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,256,0.2305
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,512,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,1024,0.2127
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,2048,0.2262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,4096,0.2393
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,8192,0.2392
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,16384,0.2503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,32768,0.2426
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,65536,0.2657
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,131072,0.3057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,2,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,4,0.2448
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,8,0.2492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,16,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,32,0.2392
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,64,0.2358
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,128,0.2363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,256,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,512,0.2358
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,1024,0.2513
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,2048,0.2494
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,4096,0.2544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,8192,0.2640
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,16384,0.2716
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,32768,0.2708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,65536,0.2996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,131072,0.3637
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,2,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,4,0.2485
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,8,0.2393
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,16,0.2384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,32,0.2416
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,64,0.2464
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,128,0.2326
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,256,0.2510
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,512,0.2502
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,1024,0.2424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,2048,0.2467
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,4096,0.2633
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,8192,0.2725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,16384,0.2773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,32768,0.2999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,65536,0.3201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,131072,0.4020
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,2,0.2623
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,4,0.2615
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,8,0.2595
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,16,0.2714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,32,0.2606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,64,0.2677
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,128,0.2670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,256,0.2582
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,512,0.2641
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,1024,0.2664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,2048,0.2692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,4096,0.2957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,16384,0.3021
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,8192,0.3024
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,32768,0.3352
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,65536,0.3895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,2,0.2921
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,131072,0.4989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,4,0.2948
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,8,0.3031
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,16,0.2999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,32,0.2953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,64,0.2955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,128,0.3030
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,256,0.3005
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,512,0.2994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,2048,0.3152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,1024,0.3050
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,4096,0.3337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,8192,0.3440
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,16384,0.3547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,32768,0.4044
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,65536,0.5029
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,2,0.4041
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,131072,0.7021
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,4,0.3994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,8,0.4032
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,16,0.3983
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,32,0.3989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,64,0.4018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,128,0.4072
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,256,0.4025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,512,0.4053
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,1024,0.4120
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,2048,0.4177
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,4096,0.4410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,32768,0.6515
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,16384,0.5339
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,8192,0.4662
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,65536,0.8855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,2,0.5681
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,4,0.5665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,131072,1.4333
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,8,0.5649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,16,0.5689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,64,0.5734
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,32,0.5679
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,128,0.5754
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,256,0.5765
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,512,0.5820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,2048,0.5995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,1024,0.5838
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,16384,0.8206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,8192,0.7019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,4096,0.6526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,32768,1.0501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,2,0.8712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,4,0.8696
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,65536,1.5567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,16,0.8733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,8,0.8686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,32,0.8765
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,64,0.8751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,128,0.8778
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,256,0.8808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,512,0.8843
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,2,0.1754
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,1024,0.9011
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,4096,1.0154
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,2048,0.9322
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,4,0.1761
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,8192,1.1234
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,8,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,16384,1.3437
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,32768,1.7756
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,16,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,64,0.1757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,128,0.1669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,256,0.1645
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,512,0.1666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,32,0.1726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,1024,0.1695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,2048,0.1669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,4096,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,8192,0.1958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,16384,0.1783
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,32768,0.1964
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,65536,0.1998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,131072,0.2302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,2,0.2382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,4,0.2456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,8,0.2485
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,16,0.2313
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,64,0.2527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,32,0.2577
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,128,0.2285
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,256,0.2312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,512,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,1024,0.2337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,2048,0.2502
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,4096,0.2505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,8192,0.2726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,16384,0.2788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,32768,0.2486
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,65536,0.2791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,2,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,131072,0.3201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,4,0.2116
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,8,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,16,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,32,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,64,0.2002
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,128,0.1941
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,256,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,512,0.2201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,1024,0.2123
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,2048,0.1973
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,4096,0.2257
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,8192,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,16384,0.2154
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,32768,0.2184
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,65536,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,131072,0.3009
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,2,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,4,0.2211
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,8,0.2235
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,16,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,32,0.2079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,64,0.2119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,128,0.2246
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,256,0.2057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,512,0.2080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,1024,0.2218
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,2048,0.2228
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,4096,0.2251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,8192,0.2319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,16384,0.2425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,32768,0.2359
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,65536,0.2681
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,2,0.2299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,4,0.2296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,8,0.2425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,16,0.2305
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,131072,0.3138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,32,0.2470
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,64,0.2259
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,128,0.2300
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,256,0.2309
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,512,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,1024,0.2429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,2048,0.2517
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,4096,0.2492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,8192,0.2664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,16384,0.2565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,32768,0.2646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,65536,0.2967
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,131072,0.3589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,2,0.2348
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,4,0.2435
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,8,0.2327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,16,0.2423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,32,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,64,0.2315
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,128,0.2422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,256,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,512,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,1024,0.2378
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,2048,0.2411
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,4096,0.2657
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,8192,0.2654
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,16384,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,32768,0.2908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,65536,0.3202
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,131072,0.3833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,2,0.2555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,4,0.2555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,8,0.2611
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,16,0.2535
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,32,0.2672
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,64,0.2627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,128,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,256,0.2569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,512,0.2548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,2048,0.2650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,1024,0.2629
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,4096,0.2886
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,8192,0.3035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,16384,0.2978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,32768,0.3241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,65536,0.3900
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,2,0.2957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,131072,0.4950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,4,0.2869
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,8,0.2870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,16,0.2961
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,32,0.2896
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,64,0.2962
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,256,0.2916
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,128,0.2913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,512,0.2957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,1024,0.3065
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,2048,0.2995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,4096,0.3325
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,16384,0.3534
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,8192,0.3449
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,32768,0.3991
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,65536,0.4971
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,131072,0.6994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,2,0.3969
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,4,0.3951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,8,0.3875
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,16,0.3907
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,32,0.3925
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,64,0.3947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,128,0.3997
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,256,0.3949
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,2048,0.4125
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,512,0.3979
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,1024,0.3977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,4096,0.4337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,8192,0.4626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,16384,0.5231
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,32768,0.6452
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,65536,0.8765
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,2,0.5520
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,4,0.5499
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,8,0.5500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,16,0.5516
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,131072,1.4356
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,32,0.5536
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,64,0.5613
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,128,0.5606
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,256,0.5632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,512,0.5612
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,2048,0.5864
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,1024,0.5718
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,4096,0.6373
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,16384,0.8098
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,8192,0.6934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,2,0.8408
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,32768,1.0374
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,4,0.8396
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,8,0.8386
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,65536,1.5411
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,16,0.8428
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,32,0.8466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,64,0.8481
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,128,0.8470
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,256,0.8527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,2048,0.8997
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,16384,1.3136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,512,0.8624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,4096,0.9842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,2,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,8192,1.0935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,1024,0.8711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,4,0.1649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,8,0.1759
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,16,0.1658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,32,0.1787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,64,0.1649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,128,0.1649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,256,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,512,0.1644
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,2048,0.1666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,4096,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,1024,0.1686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,8192,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,32768,1.7452
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,32768,0.1820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,65536,0.1998
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,131072,0.2251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,16384,0.1895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,2,0.2527
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,4,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,16,0.2307
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,32,0.2386
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,64,0.2424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,256,0.2280
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,128,0.2517
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,8,0.2243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,512,0.2444
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,1024,0.2330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,2048,0.2305
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,4096,0.2653
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,8192,0.2484
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,16384,0.2415
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,32768,0.2648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,65536,0.2639
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,131072,0.3103
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,2,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,4,0.2162
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,8,0.1952
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,16,0.1940
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,32,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,64,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,128,0.1929
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,256,0.1927
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,512,0.2176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,1024,0.2096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,2048,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,4096,0.2063
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,8192,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,16384,0.2227
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,32768,0.2142
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,65536,0.2371
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,131072,0.2974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,2,0.2047
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,4,0.2038
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,8,0.2215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,16,0.2210
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,64,0.2098
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,32,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,128,0.2233
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,256,0.2064
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,512,0.2069
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,1024,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,2048,0.2192
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,4096,0.2370
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,8192,0.2275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,16384,0.2303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,32768,0.2516
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,65536,0.2632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,131072,0.3009
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,4,0.2409
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,8,0.2272
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,16,0.2257
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,32,0.2286
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,2,0.2441
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,64,0.2403
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,128,0.2253
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,256,0.2380
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,1024,0.2285
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,2048,0.2275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,4096,0.2486
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,512,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,8192,0.2529
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,16384,0.2666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,32768,0.2781
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,65536,0.3062
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,131072,0.3433
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,2,0.2319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,4,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,8,0.2277
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,16,0.2433
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,32,0.2267
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,64,0.2414
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,128,0.2383
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,256,0.2305
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,512,0.2439
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,2048,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,1024,0.2322
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,4096,0.2489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,8192,0.2712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,16384,0.2737
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,32768,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,65536,0.3172
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,131072,0.4007
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,2,0.2490
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,4,0.2511
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,8,0.2502
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,16,0.2588
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,32,0.2621
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,64,0.2512
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,128,0.2501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,256,0.2619
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,512,0.2533
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,2048,0.2680
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,1024,0.2593
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,4096,0.2751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,16384,0.3024
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,8192,0.2911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,32768,0.3221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,65536,0.3894
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,131072,0.4899
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,2,0.2915
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,8,0.2834
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,16,0.2859
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,32,0.2835
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,128,0.2872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,4,0.2901
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,64,0.2936
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,256,0.2876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,512,0.2978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,2048,0.3001
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,1024,0.2962
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,4096,0.3197
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,8192,0.3349
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,16384,0.3507
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,32768,0.4022
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,65536,0.5028
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,131072,0.6924
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,4,0.3871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,2,0.3880
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,8,0.3841
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,16,0.3848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,32,0.3874
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,64,0.3940
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,128,0.3959
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,256,0.3921
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,512,0.3950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,1024,0.3968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,2048,0.4018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,4096,0.4277
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,16384,0.5224
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,8192,0.4576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,32768,0.6399
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,65536,0.8696
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,2,0.5455
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,4,0.5410
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,131072,1.4301
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,8,0.5423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,16,0.5439
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,32,0.5493
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,64,0.5536
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,128,0.5557
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,256,0.5568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,512,0.5544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,2048,0.5786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,4096,0.6289
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,1024,0.5664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,8192,0.6861
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,16384,0.8005
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,32768,1.0382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,65536,1.5377
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,2,0.8255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,4,0.8256
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,8,0.8287
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,32,0.8321
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,16,0.8266
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,64,0.8345
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,128,0.8324
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,256,0.8371
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,512,0.8417
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,1024,0.8605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,2048,0.8897
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,4096,0.9760
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,2,0.1625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,4,0.1688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,32768,1.7356
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,8192,1.0808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,16384,1.3003
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,8,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,16,0.1617
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,32,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,64,0.1647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,128,0.1686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,256,0.1780
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,512,0.1616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,2048,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,1024,0.1767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,4096,0.1886
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,8192,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,16384,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,32768,0.1942
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,65536,0.2018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,131072,0.2247
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,2,0.2191
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,4,0.2509
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,8,0.2504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,32,0.2280
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,16,0.2433
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,64,0.2352
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,128,0.2334
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,256,0.2276
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,512,0.2212
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,1024,0.2504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,2048,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,4096,0.2467
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,8192,0.2566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,16384,0.2423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,32768,0.2568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,65536,0.2747
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,131072,0.2905
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,2,0.2079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,4,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,8,0.1973
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,16,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,32,0.2023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,64,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,128,0.1899
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,256,0.1875
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,512,0.2085
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,1024,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,2048,0.1936
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,4096,0.2038
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,8192,0.2142
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,32768,0.2142
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,16384,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,65536,0.2453
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,131072,0.2895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,2,0.2158
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,4,0.2004
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,8,0.1976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,16,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,32,0.2053
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,64,0.2018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,128,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,256,0.2151
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,512,0.2155
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,1024,0.2186
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,2048,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,8192,0.2275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,16384,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,32768,0.2339
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,65536,0.2558
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,4096,0.2164
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,131072,0.3103
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,2,0.2421
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,4,0.2243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,8,0.2371
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,16,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,32,0.2235
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,64,0.2368
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,128,0.2391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,256,0.2222
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,512,0.2252
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,1024,0.2287
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,2048,0.2401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,4096,0.2604
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,16384,0.2504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,8192,0.2504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,32768,0.2631
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,65536,0.3054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,131072,0.3412
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,2,0.2362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,4,0.2384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,8,0.2293
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,16,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,32,0.2351
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,64,0.2255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,128,0.2243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,256,0.2267
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,512,0.2423
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,1024,0.2408
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,2048,0.2320
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,4096,0.2551
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,8192,0.2699
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,16384,0.2596
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,32768,0.2731
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,65536,0.3284
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,131072,0.3844
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,2,0.2475
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,4,0.2546
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,8,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,16,0.2498
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,32,0.2559
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,64,0.2450
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,128,0.2608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,256,0.2492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,512,0.2507
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,1024,0.2632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,2048,0.2705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,4096,0.2785
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,8192,0.2974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,16384,0.2895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,32768,0.3185
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,65536,0.3786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,131072,0.4903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,2,0.2877
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,4,0.2920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,8,0.2823
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,16,0.2815
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,64,0.2799
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,128,0.2814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,256,0.2836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,32,0.2887
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,512,0.2961
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,1024,0.3038
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,2048,0.2972
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,4096,0.3169
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,8192,0.3325
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,16384,0.3474
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,32768,0.3920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,65536,0.5002
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,2,0.3907
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,131072,0.7011
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,4,0.3836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,8,0.3831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,16,0.3854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,32,0.3848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,128,0.3921
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,64,0.3830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,256,0.3953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,512,0.3968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,2048,0.4016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,1024,0.3952
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,4096,0.4268
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,8192,0.4531
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,16384,0.5171
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,32768,0.6395
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,65536,0.8660
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,2,0.5385
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,4,0.5387
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,8,0.5391
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,131072,1.4298
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,16,0.5411
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,32,0.5404
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,64,0.5495
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,128,0.5480
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,256,0.5513
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,512,0.5541
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,2048,0.5739
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,1024,0.5618
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,4096,0.6262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,16384,0.7954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,8192,0.6791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,32768,1.0216
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,65536,1.5320
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,2,0.8182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,4,0.8202
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,8,0.8229
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,16,0.8238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,32,0.8238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,64,0.8285
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,128,0.8237
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,256,0.8280
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,512,0.8376
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,2048,0.8782
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,2,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,1024,0.8521
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,4096,0.9711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,4,0.2343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,8,0.2227
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,8192,1.0715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,16,0.2217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,32768,1.7161
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,16384,1.2980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,32,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,64,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,128,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,256,0.2231
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,512,0.2226
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,1024,0.2255
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,2048,0.2322
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,4096,0.2561
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,8192,0.2590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,16384,0.2376
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,32768,0.2424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,65536,0.2590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,131072,0.3013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,2,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,4,0.2465
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,8,0.2401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,16,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,32,0.2350
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,64,0.2328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,128,0.2339
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,256,0.2729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,512,0.2465
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,1024,0.2369
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,4096,0.2524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,8192,0.2626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,2048,0.2453
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,16384,0.2495
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,32768,0.2549
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,65536,0.2842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,131072,0.3176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,2,0.2411
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,4,0.2363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,8,0.2433
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,32,0.2470
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,16,0.2393
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,64,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,128,0.2499
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,256,0.2418
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,512,0.2387
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,1024,0.2494
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,2048,0.2474
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,4096,0.2695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,8192,0.2647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,16384,0.2647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,32768,0.2607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,65536,0.2794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,131072,0.3168
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,2,0.2576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,4,0.2508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,8,0.2544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,16,0.2467
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,32,0.2476
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,64,0.2541
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,128,0.2449
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,256,0.2489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,512,0.2528
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,1024,0.2580
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,2048,0.2602
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,4096,0.2691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,8192,0.2736
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,16384,0.2764
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,32768,0.2718
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,65536,0.2993
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,2,0.2710
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,131072,0.3491
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,8,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,4,0.2743
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,16,0.2672
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,32,0.2736
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,64,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,256,0.2715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,128,0.2751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,512,0.2692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,1024,0.2680
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,2048,0.2739
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,4096,0.2992
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,8192,0.3035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,32768,0.3070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,16384,0.3018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,65536,0.3310
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,131072,0.3806
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,2,0.2709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,4,0.2707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,16,0.2744
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,8,0.2780
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,32,0.2771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,64,0.2702
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,128,0.2716
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,256,0.2699
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,512,0.2770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,1024,0.2753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,2048,0.2880
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,4096,0.3017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,8192,0.3116
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,16384,0.3036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,32768,0.3176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,131072,0.4312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,65536,0.3574
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,2,0.2934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,4,0.2984
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,8,0.2936
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,16,0.2983
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,32,0.3014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,64,0.2968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,128,0.2914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,256,0.2978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,512,0.2958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,1024,0.3046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,2048,0.3061
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,4096,0.3295
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,8192,0.3344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,16384,0.3447
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,32768,0.3632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,65536,0.4212
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,2,0.3592
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,131072,0.5379
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,4,0.3616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,8,0.3599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,16,0.3590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,32,0.3777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,64,0.3584
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,128,0.3578
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,256,0.3599
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,512,0.3662
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,1024,0.3758
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,2048,0.3857
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,4096,0.4081
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,8192,0.4278
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,16384,0.4358
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,65536,0.5827
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,32768,0.4880
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,131072,0.7833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,2,0.4919
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,4,0.4904
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,16,0.4909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,8,0.4903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,32,0.4941
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,64,0.4959
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,128,0.4966
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,256,0.5028
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,512,0.5063
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,1024,0.5337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,2048,0.5525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,4096,0.5827
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,16384,0.6711
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,8192,0.6143
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,32768,0.7901
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,65536,1.0253
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,2,0.7680
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,131072,1.5748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,4,0.7658
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,8,0.7672
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,16,0.7674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,64,0.8475
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,32,0.8113
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,128,0.7742
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,256,0.7780
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,512,0.7909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,2048,0.8683
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,1024,0.8172
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,4096,0.9287
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,16384,1.0988
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,32768,1.3317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,8192,0.9862
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,65536,1.8365
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,4,1.3279
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,8,1.3309
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,2,1.3145
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,16,1.3253
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,64,1.3454
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,128,1.3939
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,32,1.3365
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,256,1.3508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,512,1.3817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,2048,1.5143
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,2,0.2149
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,4096,1.6332
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,1024,1.4362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,4,0.1972
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,8192,1.7392
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,16384,1.9611
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,8,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,32,0.1971
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,16,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,64,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,512,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,128,0.2165
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,256,0.1969
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,32768,2.3943
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,1024,0.2019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,2048,0.1992
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,4096,0.2305
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,8192,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,16384,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,65536,0.2425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,2,0.2222
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,4,0.2236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,131072,0.2634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,8,0.2093
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,32768,0.2290
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,16,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,32,0.2096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,64,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,128,0.2095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,256,0.2219
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,512,0.2287
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,1024,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,2048,0.2223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,4096,0.2265
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,8192,0.2343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,16384,0.2296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,32768,0.2298
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,65536,0.2637
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,131072,0.2820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,8,0.2242
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,4,0.2140
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,16,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,32,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,2,0.2125
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,64,0.2142
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,128,0.2202
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,256,0.2156
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,512,0.2186
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,1024,0.2245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,2048,0.2299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,4096,0.2434
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,8192,0.2387
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,16384,0.2381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,32768,0.2376
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,65536,0.2596
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,131072,0.2923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,2,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,4,0.2303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,8,0.2313
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,16,0.2244
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,32,0.2216
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,64,0.2269
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,128,0.2325
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,256,0.2230
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,512,0.2250
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,1024,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,2048,0.2343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,4096,0.2444
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,8192,0.2474
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,16384,0.2548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,32768,0.2564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,65536,0.2725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,131072,0.3142
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,2,0.2508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,8,0.2429
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,4,0.2508
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,16,0.2526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,32,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,64,0.2450
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,128,0.2498
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,256,0.2432
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,512,0.2448
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,1024,0.2431
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,2048,0.2608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,4096,0.2735
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,8192,0.2725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,16384,0.2691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,32768,0.2807
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,65536,0.3146
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,131072,0.3578
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,2,0.2421
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,4,0.2489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,8,0.2418
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,32,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,16,0.2428
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,64,0.2484
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,128,0.2450
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,256,0.2443
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,512,0.2505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,1024,0.2454
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,2048,0.2520
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,4096,0.2757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,8192,0.2836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,16384,0.2753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,32768,0.2976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,65536,0.3308
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,131072,0.4008
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,2,0.2583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,4,0.2741
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,16,0.2630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,8,0.2651
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,32,0.2590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,64,0.2590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,128,0.2591
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,256,0.2598
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,512,0.2602
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,1024,0.2695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,2048,0.2771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,4096,0.2909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,8192,0.3054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,16384,0.3060
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,32768,0.3305
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,65536,0.3896
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,131072,0.5017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,2,0.3156
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,4,0.3134
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,8,0.3325
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,16,0.3162
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,32,0.3138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,64,0.3122
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,128,0.3247
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,256,0.3128
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,512,0.3209
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,1024,0.3241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,2048,0.3409
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,4096,0.3648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,8192,0.3770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,32768,0.4357
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,16384,0.3890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,65536,0.5405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,131072,0.7319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,2,0.4121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,4,0.4151
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,8,0.4459
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,16,0.4111
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,32,0.4127
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,128,0.4147
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,64,0.4126
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,256,0.4182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,512,0.4277
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,1024,0.4422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,2048,0.4697
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,4096,0.4969
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,2,0.6198
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,4,0.7027
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,8,0.6217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,8192,0.5266
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,16384,0.5855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,32768,0.7034
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,16,0.6217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,65536,0.9398
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,131072,1.4931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,32,0.6270
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,64,0.6312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,128,0.6343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,256,0.6409
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,2048,0.7302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,512,0.6539
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,1024,0.6780
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,4096,0.7911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,8192,0.8466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,16384,0.9563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,32768,1.1845
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,2,1.3348
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,65536,1.6825
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,4,1.0671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,8,1.0674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,16,1.2236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,32,1.0699
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,64,1.2202
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,128,1.0823
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,256,1.1517
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,512,1.1219
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,2048,1.2634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,1024,1.1638
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,4096,1.3724
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,2,0.1804
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,16384,1.6938
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,4,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,8192,1.4777
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,8,0.1780
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,64,0.1796
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,128,0.1757
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,256,0.1906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,32,0.1883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,16,0.1853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,512,0.1769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,1024,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,2048,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,32768,2.1286
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,4096,0.1882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,8192,0.2096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,16384,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,32768,0.1943
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,65536,0.2280
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,131072,0.2504
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,2,0.1875
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,4,0.1829
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,8,0.1950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,16,0.1830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,32,0.1809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,128,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,256,0.1840
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,512,0.1925
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,2048,0.1853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,1024,0.1973
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,4096,0.2025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,64,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,8192,0.2038
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,16384,0.2103
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,32768,0.2207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,65536,0.2236
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,131072,0.2605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,2,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,4,0.2017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,8,0.1859
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,16,0.1887
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,32,0.2007
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,64,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,128,0.1982
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,256,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,512,0.1882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,1024,0.1862
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,2048,0.1950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,4096,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,8192,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,16384,0.2095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,32768,0.2299
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,65536,0.2512
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,131072,0.2736
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,2,0.1942
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,4,0.1961
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,8,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,16,0.1994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,32,0.2093
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,64,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,128,0.2023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,256,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,512,0.2093
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,1024,0.2040
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,2048,0.2186
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,4096,0.2227
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,8192,0.2249
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,16384,0.2383
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,32768,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,65536,0.2687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,131072,0.3064
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,2,0.2178
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,4,0.2302
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,16,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,32,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,8,0.2182
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,64,0.2174
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,128,0.2260
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,256,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,512,0.2296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,1024,0.2256
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,2048,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,4096,0.2545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,8192,0.2565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,16384,0.2551
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,32768,0.2692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,65536,0.2909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,131072,0.3489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,2,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,4,0.2229
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,8,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,16,0.2196
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,32,0.2276
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,64,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,128,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,256,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,512,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,1024,0.2368
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,2048,0.2390
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,4096,0.2583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,8192,0.2602
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,16384,0.2568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,32768,0.2814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,65536,0.3121
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,131072,0.3811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,2,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,4,0.2496
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,8,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,16,0.2342
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,64,0.2389
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,32,0.2386
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,128,0.2381
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,256,0.2364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,512,0.2385
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,2048,0.2502
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,4096,0.2692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,8192,0.2848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,1024,0.2411
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,16384,0.2877
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,32768,0.3132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,65536,0.3689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,131072,0.4748
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,2,0.2995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,4,0.3022
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,8,0.2820
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,16,0.2786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,32,0.2800
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,64,0.2811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,128,0.3201
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,256,0.2861
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,512,0.2852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,1024,0.3019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,2048,0.3083
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,4096,0.3272
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,8192,0.3448
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,16384,0.3581
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,32768,0.4059
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,65536,0.5069
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,2,0.3632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,4,0.3656
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,131072,0.7071
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,8,0.3641
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,16,0.3629
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,32,0.3641
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,64,0.3675
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,128,0.3674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,256,0.3685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,512,0.3944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,1024,0.4161
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,2048,0.4137
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,4096,0.4456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,16384,0.5329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,8192,0.4784
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,32768,0.6541
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,65536,0.8809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,2,0.5124
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,4,0.5134
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,8,0.5113
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,32,0.5124
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,16,0.5123
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,131072,1.4442
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,64,0.5169
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,128,0.5790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,256,0.5259
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,512,0.5393
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,2048,0.6110
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,4096,0.6744
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,1024,0.5663
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,16384,0.8432
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,8192,0.7304
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,32768,1.0745
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,2,0.8464
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,4,0.8505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,8,0.8532
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,65536,1.5829
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,16,0.9307
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,32,0.8616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,64,1.0059
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,128,0.8671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,256,0.8865
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,2048,1.0522
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,512,0.9721
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,4096,1.1613
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,16384,1.4800
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,1024,0.9543
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,8192,1.2709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,32768,1.9084
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,2,0.2321
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,4,0.2433
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,8,0.2422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,16,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,32,0.2420
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,64,0.2380
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,256,0.2327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,512,0.2432
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,128,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,1024,0.2324
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,2048,0.2361
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,4096,0.2590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,8192,0.2549
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,16384,0.2521
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,32768,0.2501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,65536,0.2812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,131072,0.2939
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,2,0.2530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,4,0.2486
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,8,0.2583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,32,0.2588
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,64,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,128,0.2524
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,256,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,512,0.2545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,16,0.2633
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,1024,0.2486
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,2048,0.2566
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,4096,0.2753
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,8192,0.2663
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,16384,0.2673
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,32768,0.2717
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,65536,0.2828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,131072,0.3127
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,2,0.2694
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,4,0.2594
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,8,0.2682
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,32,0.2653
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,16,0.2579
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,64,0.2637
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,128,0.2585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,256,0.2695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,512,0.2668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,1024,0.2684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,4096,0.2756
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,8192,0.2813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,2048,0.2596
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,16384,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,32768,0.2792
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,65536,0.3093
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,131072,0.3401
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,2,0.2725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,8,0.2781
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,4,0.2678
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,16,0.2705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,32,0.2730
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,64,0.2674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,128,0.2798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,256,0.2790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,512,0.2723
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,1024,0.2695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,2048,0.2693
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,4096,0.2989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,8192,0.2947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,16384,0.2835
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,32768,0.3032
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,65536,0.3249
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,2,0.2953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,131072,0.3532
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,4,0.2920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,8,0.2991
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,16,0.2945
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,32,0.2909
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,64,0.3033
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,128,0.2991
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,256,0.2908
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,512,0.2957
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,1024,0.2920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,2048,0.2996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,4096,0.3198
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,8192,0.3152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,32768,0.3262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,16384,0.3111
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,65536,0.3483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,131072,0.4022
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,2,0.3063
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,4,0.3143
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,8,0.3128
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,32,0.3064
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,64,0.3077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,128,0.3064
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,16,0.3142
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,256,0.3096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,1024,0.3144
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,512,0.3067
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,4096,0.3251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,2048,0.3150
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,8192,0.3309
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,16384,0.3319
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,32768,0.3493
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,65536,0.3923
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,2,0.3608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,131072,0.4516
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,4,0.3547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,8,0.3564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,16,0.3636
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,32,0.3562
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,64,0.3546
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,128,0.3605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,256,0.3615
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,512,0.3576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,1024,0.3539
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,4096,0.3756
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,2048,0.3649
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,8192,0.3867
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,16384,0.3903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,32768,0.4210
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,65536,0.4780
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,131072,0.5841
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,2,0.4472
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,4,0.4478
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,8,0.4431
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,16,0.4431
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,128,0.4466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,32,0.4421
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,64,0.4434
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,256,0.4416
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,512,0.4459
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,1024,0.4499
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,2048,0.4491
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,4096,0.4714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,16384,0.5071
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,32768,0.5461
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,8192,0.4861
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,65536,0.6463
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,2,0.6684
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,4,0.6653
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,131072,0.8416
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,8,0.6640
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,16,0.6662
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,32,0.6691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,64,0.6726
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,128,0.6741
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,256,0.6797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,512,0.6824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,1024,0.6857
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,2048,0.6881
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,4096,0.7170
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,16384,0.8053
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,8192,0.7431
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,65536,1.1518
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,32768,0.9276
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,2,1.1027
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,4,1.0991
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,131072,1.7045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,8,1.1012
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,16,1.1049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,32,1.1045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,64,1.1114
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,128,1.1168
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,256,1.1160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,512,1.1180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,2048,1.1382
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,1024,1.1235
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,4096,1.1938
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,16384,1.3654
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,32768,1.6005
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,8192,1.2447
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,2,1.8692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,4,1.8672
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,8,1.8764
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,65536,2.1200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,16,1.8796
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,32,1.8842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,64,1.8844
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,128,1.8790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,256,1.8832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,2048,1.9395
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,512,1.8836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,1024,1.9050
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,4096,2.0280
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,2,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,16384,2.3529
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,8192,2.1365
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,4,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,32768,2.7782
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,8,0.1999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,16,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,32,0.1941
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,64,0.2032
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,128,0.2063
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,256,0.1977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,512,0.2042
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,2048,0.2004
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,1024,0.1936
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,4096,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,8192,0.2286
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,16384,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,32768,0.2217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,131072,0.2664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,65536,0.2326
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,2,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,4,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,8,0.2254
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,16,0.2176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,32,0.2135
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,64,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,128,0.2066
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,256,0.2187
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,512,0.2070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,1024,0.2093
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,2048,0.2186
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,4096,0.2239
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,8192,0.2358
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,16384,0.2188
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,32768,0.2313
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,65536,0.2569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,131072,0.2721
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,2,0.2239
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,4,0.2230
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,8,0.2119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,16,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,32,0.2132
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,64,0.2178
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,128,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,512,0.2231
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,256,0.2227
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,1024,0.2154
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,2048,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,4096,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,8192,0.2337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,16384,0.2376
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,32768,0.2426
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,65536,0.2571
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,131072,0.2870
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,2,0.2310
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,4,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,8,0.2183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,16,0.2296
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,32,0.2291
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,64,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,128,0.2248
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,256,0.2217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,512,0.2197
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,1024,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,2048,0.2304
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,4096,0.2346
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,16384,0.2333
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,8192,0.2514
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,32768,0.2483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,65536,0.2673
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,131072,0.3162
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,2,0.2417
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,4,0.2428
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,8,0.2511
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,16,0.2422
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,32,0.2510
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,64,0.2469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,128,0.2528
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,256,0.2441
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,512,0.2439
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,1024,0.2413
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,2048,0.2526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,4096,0.2587
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,8192,0.2763
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,32768,0.2771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,16384,0.2589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,131072,0.3584
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,65536,0.3025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,2,0.2450
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,4,0.2526
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,8,0.2525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,16,0.2452
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,32,0.2484
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,128,0.2462
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,256,0.2523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,512,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,64,0.2469
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,1024,0.2540
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,2048,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,8192,0.2751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,4096,0.2709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,16384,0.2764
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,32768,0.2868
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,65536,0.3251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,131072,0.4033
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,2,0.2707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,8,0.2775
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,4,0.2735
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,16,0.2746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,32,0.2776
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,64,0.2716
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,128,0.2717
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,256,0.2789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,512,0.2786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,1024,0.2715
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,4096,0.2945
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,2048,0.2747
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,16384,0.3123
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,8192,0.3035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,32768,0.3338
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,65536,0.3978
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,131072,0.5118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,2,0.3162
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,4,0.3159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,8,0.3166
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,16,0.3162
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,32,0.3194
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,64,0.3160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,128,0.3203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,256,0.3209
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,512,0.3188
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,1024,0.3184
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,2048,0.3210
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,4096,0.3452
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,8192,0.3604
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,16384,0.3760
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,32768,0.4222
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,65536,0.5195
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,131072,0.7133
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,2,0.4312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,4,0.4341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,8,0.4318
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,16,0.4340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,64,0.4328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,128,0.4347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,32,0.4343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,256,0.4377
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,2048,0.4465
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,512,0.4347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,1024,0.4386
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,4096,0.4760
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,8192,0.4994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,16384,0.5564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,32768,0.6791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,65536,0.9102
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,2,0.6547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,4,0.6518
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,8,0.6551
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,32,0.6571
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,16,0.6567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,131072,1.4662
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,64,0.6664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,256,0.6714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,128,0.6709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,512,0.6735
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,2048,0.6946
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,4096,0.7453
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,8192,0.8004
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,1024,0.6797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,16384,0.9128
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,32768,1.1458
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,2,1.0650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,65536,1.6452
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,8,1.0687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,16,1.0727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,4,1.0681
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,32,1.0708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,64,1.0794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,128,1.0746
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,256,1.0811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,512,1.0824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,2048,1.1274
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,2,0.1829
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,1024,1.0989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,4,0.1813
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,4096,1.2108
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,16384,1.5418
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,8192,1.3217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,8,0.1835
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,32768,1.9708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,16,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,32,0.1796
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,64,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,128,0.1765
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,256,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,1024,0.1833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,512,0.1830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,2048,0.1801
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,4096,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,8192,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,16384,0.1889
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,32768,0.2066
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,65536,0.2110
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,131072,0.2486
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,2,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,4,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,8,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,16,0.1878
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,32,0.1921
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,64,0.2000
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,128,0.1890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,256,0.1875
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,1024,0.1854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,512,0.1810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,2048,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,4096,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,8192,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,16384,0.2080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,32768,0.2185
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,65536,0.2250
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,131072,0.2610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,2,0.1919
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,4,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,8,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,32,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,64,0.2010
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,128,0.1916
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,16,0.1899
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,256,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,512,0.1930
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,1024,0.2066
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,2048,0.1968
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,4096,0.2206
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,8192,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,16384,0.2126
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,32768,0.2308
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,65536,0.2412
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,131072,0.2729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,2,0.2103
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,4,0.2045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,8,0.2116
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,16,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,32,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,128,0.2101
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,256,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,512,0.2018
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,1024,0.2053
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,4096,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,64,0.2071
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,2048,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,8192,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,16384,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,32768,0.2337
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,65536,0.2569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,131072,0.2981
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,2,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,4,0.2355
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,8,0.2336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,16,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,32,0.2303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,64,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,128,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,256,0.2263
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,512,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,2048,0.2400
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,1024,0.2383
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,4096,0.2489
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,8192,0.2586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,16384,0.2532
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,32768,0.2696
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,65536,0.2911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,131072,0.3473
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,2,0.2374
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,4,0.2288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,8,0.2274
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,16,0.2315
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,32,0.2279
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,64,0.2346
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,128,0.2380
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,256,0.2377
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,512,0.2384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,1024,0.2342
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,2048,0.2363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,4096,0.2598
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,16384,0.2589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,8192,0.2588
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,32768,0.2814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,65536,0.3166
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,131072,0.3828
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,2,0.2587
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,4,0.2548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,8,0.2630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,16,0.2555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,32,0.2547
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,64,0.2590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,128,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,256,0.2582
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,512,0.2572
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,1024,0.2648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,2048,0.2651
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,4096,0.2778
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,8192,0.2896
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,16384,0.2980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,32768,0.3186
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,65536,0.3782
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,2,0.2958
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,131072,0.4962
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,4,0.3002
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,8,0.2965
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,16,0.2960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,32,0.3001
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,64,0.2961
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,256,0.2976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,128,0.3019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,512,0.3045
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,1024,0.3043
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,2048,0.3028
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,4096,0.3243
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,8192,0.3449
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,16384,0.3529
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,32768,0.4038
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,65536,0.5005
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,4,0.4051
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,2,0.4014
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,16,0.4049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,8,0.4025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,131072,0.6975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,32,0.4038
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,64,0.4075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,128,0.4049
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,256,0.4082
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,512,0.4062
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,2048,0.4179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,1024,0.4099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,4096,0.4434
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,8192,0.4745
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,16384,0.5316
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,32768,0.6545
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,65536,0.8827
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,2,0.5932
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,4,0.5925
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,8,0.5906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,16,0.5942
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,131072,1.4373
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,32,0.5947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,64,0.6002
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,128,0.6025
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,256,0.6048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,512,0.6083
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,2048,0.6261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,4096,0.6800
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,1024,0.6152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,8192,0.7345
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,16384,0.8483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,32768,1.0835
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,65536,1.5836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,2,0.9641
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,4,0.9581
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,8,0.9610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,16,0.9628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,32,0.9686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,64,0.9742
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,128,0.9712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,256,0.9744
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,512,0.9858
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,1024,0.9921
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,2048,1.0229
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,2,0.1668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,4096,1.1122
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,16384,1.4312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,8192,1.2162
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,4,0.1800
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,32768,1.8607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,8,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,16,0.1764
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,32,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,64,0.1686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,128,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,256,0.1778
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,512,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,1024,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,2048,0.1717
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,4096,0.1839
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,16384,0.1932
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,8192,0.1792
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,32768,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,65536,0.2023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,131072,0.2386
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,2,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,4,0.1894
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,8,0.1793
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,16,0.1808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,32,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,64,0.1730
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,128,0.1774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,256,0.1869
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,512,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,1024,0.1862
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,2048,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,4096,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,8192,0.1937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,16384,0.1952
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,32768,0.2046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,65536,0.2292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,131072,0.2468
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,2,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,4,0.1987
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,8,0.1818
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,16,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,32,0.1815
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,64,0.1937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,128,0.1863
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,256,0.1825
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,512,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,1024,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,2048,0.1920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,4096,0.2100
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,8192,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,16384,0.1948
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,32768,0.2095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,65536,0.2312
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,131072,0.2643
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,2,0.1999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,4,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,8,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,16,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,32,0.1915
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,64,0.1960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,128,0.1917
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,256,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,512,0.1905
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,1024,0.2037
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,2048,0.1930
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,4096,0.2173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,8192,0.2099
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,16384,0.2119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,32768,0.2328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,65536,0.2586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,131072,0.2880
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,2,0.2160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,4,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,8,0.2233
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,16,0.2218
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,32,0.2128
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,64,0.2160
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,128,0.2223
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,256,0.2119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,512,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,1024,0.2137
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,2048,0.2292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,4096,0.2436
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,8192,0.2424
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,16384,0.2494
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,32768,0.2548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,65536,0.2827
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,131072,0.3428
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,2,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,8,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,4,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,16,0.2162
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,32,0.2235
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,64,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,256,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,128,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,512,0.2192
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,1024,0.2276
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,2048,0.2329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,4096,0.2430
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,8192,0.2510
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,16384,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,32768,0.2694
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,65536,0.3080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,131072,0.3745
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,2,0.2428
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,4,0.2467
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,8,0.2384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,16,0.2383
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,32,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,64,0.2407
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,128,0.2402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,256,0.2435
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,512,0.2466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,1024,0.2523
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,2048,0.2492
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,4096,0.2669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,16384,0.2882
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,32768,0.3104
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,65536,0.3718
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,131072,0.4791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,8192,0.2787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,2,0.2799
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,4,0.2752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,8,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,16,0.2795
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,32,0.2750
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,64,0.2795
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,128,0.2822
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,256,0.2791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,512,0.2816
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,1024,0.2851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,2048,0.2921
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,4096,0.3101
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,8192,0.3262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,16384,0.3435
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,32768,0.3895
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,65536,0.4842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,131072,0.6780
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,2,0.3792
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,4,0.3766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,8,0.3766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,16,0.3765
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,32,0.3831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,64,0.3814
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,128,0.3802
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,256,0.3817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,512,0.3851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,1024,0.3840
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,4096,0.4217
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,8192,0.4475
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,16384,0.5079
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,32768,0.6264
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,2048,0.3911
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,65536,0.8583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,2,0.5447
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,4,0.5411
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,131072,1.4157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,16,0.5425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,32,0.5438
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,64,0.5483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,8,0.5446
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,128,0.5533
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,256,0.5563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,512,0.5559
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,1024,0.5618
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,2048,0.5749
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,4096,0.6298
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,16384,0.7976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,8192,0.6849
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,32768,1.0308
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,2,0.8538
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,4,0.8560
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,8,0.8546
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,65536,1.5363
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,16,0.8555
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,32,0.8600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,64,0.8601
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,128,0.8646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,256,0.8629
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,2048,0.9143
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,512,0.8727
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,2,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,1024,0.8904
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,16384,1.3293
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,4096,1.0020
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,4,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,8,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,8192,1.1098
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,16,0.1605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,32,0.1720
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,64,0.1634
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,128,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,256,0.1628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,512,0.1707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,1024,0.1607
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,32768,1.7596
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,2048,0.1725
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,4096,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,8192,0.1810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,16384,0.1701
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,32768,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,65536,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,131072,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,2,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,4,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,8,0.1704
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,16,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,32,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,64,0.1766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,128,0.1854
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,256,0.1712
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,512,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,1024,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,4096,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,8192,0.1871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,16384,0.1980
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,32768,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,65536,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,131072,0.2528
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,2048,0.1842
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,2,0.1741
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,4,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,8,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,16,0.1805
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,32,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,64,0.1810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,128,0.1794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,256,0.1751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,512,0.1905
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,1024,0.1855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,2048,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,4096,0.1920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,8192,0.1976
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,16384,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,32768,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,65536,0.2207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,131072,0.2729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,4,0.1868
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,8,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,2,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,16,0.1855
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,32,0.1897
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,64,0.1848
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,128,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,256,0.1951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,512,0.1853
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,1024,0.1879
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,2048,0.1972
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,4096,0.2119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,16384,0.2153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,32768,0.2163
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,65536,0.2425
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,131072,0.2817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,8192,0.2057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,2,0.2165
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,4,0.2183
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,8,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,16,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,32,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,64,0.2176
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,256,0.2058
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,128,0.2057
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,512,0.2196
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,1024,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,2048,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,4096,0.2287
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,8192,0.2440
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,16384,0.2419
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,32768,0.2467
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,65536,0.2876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,131072,0.3273
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,2,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,4,0.2095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,8,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,16,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,32,0.2087
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,64,0.2178
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,128,0.2191
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,256,0.2111
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,512,0.2137
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,1024,0.2239
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,2048,0.2190
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,4096,0.2402
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,8192,0.2551
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,16384,0.2459
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,32768,0.2616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,65536,0.3029
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,131072,0.3770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,2,0.2375
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,4,0.2394
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,8,0.2318
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,16,0.2347
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,32,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,64,0.2330
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,128,0.2333
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,256,0.2398
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,512,0.2442
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,1024,0.2483
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,2048,0.2447
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,4096,0.2618
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,8192,0.2742
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,16384,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,32768,0.3078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,65536,0.3687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,131072,0.4739
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,2,0.2648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,4,0.2677
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,8,0.2659
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,16,0.2692
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,32,0.2650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,64,0.2691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,128,0.2714
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,256,0.2691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,512,0.2752
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,2048,0.2816
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,1024,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,4096,0.3008
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,8192,0.3219
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,16384,0.3367
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,32768,0.3810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,65536,0.4774
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,2,0.3624
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,131072,0.6744
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,4,0.3632
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,8,0.3622
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,16,0.3652
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,32,0.3670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,64,0.3687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,128,0.3661
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,256,0.3676
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,512,0.3695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,1024,0.3728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,4096,0.4103
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,16384,0.4951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,8192,0.4362
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,2048,0.3809
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,32768,0.6152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,65536,0.8436
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,4,0.5189
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,2,0.5199
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,8,0.5175
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,16,0.5209
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,32,0.5195
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,131072,1.4046
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,64,0.5225
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,128,0.5288
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,256,0.5298
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,512,0.5329
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,2048,0.5551
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,1024,0.5390
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,16384,0.7719
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,4096,0.6051
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,8192,0.6591
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,32768,1.0074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,2,0.8067
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,65536,1.5130
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,4,0.8076
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,8,0.8033
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,16,0.8054
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,32,0.8062
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,64,0.8117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,128,0.8111
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,256,0.8156
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,512,0.8221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,2,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,16384,1.2801
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,2048,0.8609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,1024,0.8379
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,4096,0.9475
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,4,0.1685
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,8192,1.0628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,8,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,16,0.1564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,32,0.1686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,64,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,128,0.1696
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,256,0.1707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,512,0.1630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,32768,1.7116
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,1024,0.1577
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,2048,0.1674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,4096,0.1687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,8192,0.1874
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,16384,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,32768,0.1869
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,65536,0.1999
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,4,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,131072,0.2165
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,2,0.1688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,8,0.1668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,16,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,32,0.1668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,64,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,128,0.1801
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,256,0.1664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,512,0.1733
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,1024,0.1833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,2048,0.1669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,4096,0.1824
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,8192,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,16384,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,32768,0.2002
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,65536,0.2032
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,2,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,131072,0.2271
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,4,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,8,0.1731
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,16,0.1793
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,32,0.1871
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,64,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,128,0.1721
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,256,0.1767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,512,0.1890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,1024,0.1787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,2048,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,4096,0.1997
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,8192,0.2000
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,16384,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,32768,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,65536,0.2181
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,131072,0.2680
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,2,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,4,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,8,0.1890
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,16,0.1873
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,32,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,64,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,128,0.1947
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,256,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,512,0.1819
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,2048,0.1974
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,4096,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,8192,0.2095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,16384,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,32768,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,1024,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,65536,0.2409
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,131072,0.2803
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,2,0.2162
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,4,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,8,0.2154
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,16,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,32,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,64,0.2033
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,128,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,256,0.2037
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,512,0.2178
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,1024,0.2067
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,2048,0.2096
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,4096,0.2311
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,8192,0.2345
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,16384,0.2408
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,32768,0.2568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,65536,0.2735
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,131072,0.3237
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,2,0.2059
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,8,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,16,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,32,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,4,0.2145
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,64,0.2137
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,128,0.2061
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,256,0.2080
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,512,0.2073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,1024,0.2119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,4096,0.2354
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,8192,0.2501
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,16384,0.2500
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,32768,0.2569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,65536,0.3062
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,2048,0.2141
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,131072,0.3660
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,2,0.2303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,4,0.2336
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,8,0.2344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,16,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,32,0.2303
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,64,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,128,0.2361
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,256,0.2304
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,512,0.2341
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,1024,0.2451
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,2048,0.2416
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,4096,0.2600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,16384,0.2797
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,8192,0.2697
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,32768,0.3062
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,65536,0.3581
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,131072,0.4731
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,2,0.2663
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,4,0.2609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,8,0.2610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,16,0.2600
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,32,0.2633
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,64,0.2648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,128,0.2618
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,256,0.2630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,512,0.2742
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,1024,0.2743
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,2048,0.2762
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,4096,0.3032
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,16384,0.3281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,8192,0.3135
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,32768,0.3767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,65536,0.4751
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,2,0.3546
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,8,0.3605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,4,0.3565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,16,0.3567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,131072,0.6765
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,32,0.3574
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,64,0.3605
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,128,0.3585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,256,0.3610
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,512,0.3630
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,1024,0.3707
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,2048,0.3742
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,4096,0.4051
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,8192,0.4291
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,16384,0.4906
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,32768,0.6094
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,65536,0.8419
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,4,0.5063
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,131072,1.3993
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,8,0.5060
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,2,0.5069
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,16,0.5073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,64,0.5104
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,128,0.5161
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,32,0.5087
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,256,0.5179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,512,0.5209
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,2048,0.5417
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,1024,0.5284
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,16384,0.7601
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,4096,0.5937
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,8192,0.6455
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,32768,0.9912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,2,0.7816
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,4,0.7807
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,8,0.7798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,65536,1.5077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,16,0.7815
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,32,0.7836
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,64,0.7843
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,128,0.7868
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,256,0.7903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,512,0.7990
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,2048,0.8405
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,1024,0.8109
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,4096,0.9275
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,16384,1.2534
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,2,0.1666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,4,0.1563
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,8,0.1671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,8192,1.0358
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,16,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,32,0.1664
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,64,0.1564
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,128,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,256,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,32768,1.6786
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,512,0.1674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,1024,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,2048,0.1669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,4096,0.1665
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,8192,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,16384,0.1687
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,32768,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,65536,0.1883
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,131072,0.2292
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,2,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,4,0.1798
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,8,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,16,0.1793
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,32,0.1608
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,64,0.1666
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,128,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,256,0.1688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,512,0.1674
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,1024,0.1671
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,2048,0.1794
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,4096,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,8192,0.1771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,16384,0.1920
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,32768,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,65536,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,131072,0.2466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,2,0.1694
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,8,0.1720
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,16,0.1691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,32,0.1771
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,64,0.1822
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,128,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,256,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,512,0.1705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,1024,0.1695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,4,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,2048,0.1705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,4096,0.1856
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,8192,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,16384,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,32768,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,65536,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,131072,0.2556
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,2,0.1839
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,4,0.1868
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,8,0.1952
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,16,0.1949
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,32,0.1876
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,64,0.1972
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,128,0.1831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,256,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,512,0.1833
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,1024,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,2048,0.1938
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,4096,0.2076
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,16384,0.1989
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,32768,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,65536,0.2456
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,8192,0.2107
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,131072,0.2835
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,2,0.2032
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,4,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,16,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,8,0.2040
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,32,0.2150
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,64,0.2088
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,128,0.2144
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,256,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,512,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,1024,0.2048
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,2048,0.2075
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,4096,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,8192,0.2371
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,16384,0.2326
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,32768,0.2553
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,65536,0.2745
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,131072,0.3221
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,2,0.2053
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,4,0.2092
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,8,0.2033
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,16,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,32,0.2136
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,64,0.2146
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,128,0.2037
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,256,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,512,0.2123
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,1024,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,2048,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,4096,0.2258
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,8192,0.2466
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,16384,0.2506
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,32768,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,65536,0.2951
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,131072,0.3688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,2,0.2317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,4,0.2242
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,8,0.2259
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,16,0.2317
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,32,0.2334
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,64,0.2262
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,128,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,256,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,512,0.2358
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,1024,0.2325
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,4096,0.2558
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,16384,0.2713
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,8192,0.2686
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,2048,0.2450
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,32768,0.3031
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,65536,0.3568
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,131072,0.4670
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,4,0.2628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,2,0.2587
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,16,0.2609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,32,0.2647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,64,0.2626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,8,0.2569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,128,0.2589
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,256,0.2590
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,512,0.2689
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,2048,0.2766
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,1024,0.2719
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,4096,0.3021
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,8192,0.3166
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,16384,0.3252
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,32768,0.3719
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,65536,0.4708
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,2,0.3530
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,4,0.3553
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,16,0.3531
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,131072,0.6784
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,32,0.3543
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,64,0.3532
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,8,0.3560
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,128,0.3559
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,256,0.3583
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,512,0.3626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,1024,0.3647
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,2048,0.3755
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,4096,0.3977
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,16384,0.4850
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,8192,0.4257
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,32768,0.6073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,65536,0.8384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,2,0.4994
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,4,0.5009
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,8,0.5009
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,16,0.4997
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,32,0.5019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,131072,1.3946
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,64,0.5062
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,128,0.5095
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,256,0.5112
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,2048,0.5356
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,512,0.5152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,4096,0.5857
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,1024,0.5233
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,16384,0.7525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,8192,0.6454
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,32768,0.9903
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,65536,1.4866
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,2,0.7683
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,4,0.7678
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,8,0.7682
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,16,0.7673
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,32,0.7732
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,64,0.7740
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,128,0.7745
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,256,0.7755
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,512,0.7872
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,2048,0.8266
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,4096,0.9191
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,1024,0.8003
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,2,0.1514
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,8192,1.0173
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,4,0.1521
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,32768,1.6718
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,8,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,16384,1.2399
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,16,0.1502
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,32,0.1648
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,64,0.1503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,128,0.1628
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,256,0.1586
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,512,0.1588
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,1024,0.1562
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,2048,0.1505
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,4096,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,8192,0.1688
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,16384,0.1768
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,32768,0.1808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,65536,0.1929
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,131072,0.2120
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,2,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,4,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,8,0.1772
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,16,0.1641
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,32,0.1767
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,64,0.1768
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,128,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,256,0.1705
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,512,0.1773
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,1024,0.1626
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,2048,0.1668
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,4096,0.1760
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,8192,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,16384,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,32768,0.1931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,65536,0.1986
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,131072,0.2327
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,2,0.1792
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,4,0.1695
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,8,0.1697
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,16,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,32,0.1669
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,64,0.1691
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,128,0.1816
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,256,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,512,0.1808
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,1024,0.1718
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,2048,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,4096,0.1791
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,8192,0.1845
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,16384,0.1830
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,32768,0.2030
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,65536,0.2207
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,2,0.1770
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,131072,0.2539
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,4,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,8,0.1807
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,16,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,32,0.1810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,64,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,128,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,256,0.1787
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,512,0.1769
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,1024,0.1801
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,2048,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,4096,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,8192,0.1981
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,16384,0.2034
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,32768,0.2149
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,65536,0.2332
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,131072,0.2738
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,2,0.2040
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,4,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,8,0.2020
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,16,0.2137
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,32,0.2152
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,64,0.2115
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,128,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,256,0.2012
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,512,0.2023
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,1024,0.2153
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,2048,0.2067
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,4096,0.2226
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,8192,0.2350
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,16384,0.2364
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,32768,0.2432
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,65536,0.2722
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,131072,0.3343
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,2,0.2017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,4,0.2038
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,8,0.2120
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,16,0.2017
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,32,0.2066
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,64,0.2129
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,128,0.2019
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,256,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,512,0.2038
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,1024,0.2073
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,2048,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,4096,0.2357
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,8192,0.2411
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,32768,0.2627
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,65536,0.2931
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,131072,0.3631
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,16384,0.2486
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,2,0.2222
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,4,0.2235
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,8,0.2298
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,16,0.2245
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,32,0.2225
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,64,0.2251
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,128,0.2307
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,256,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,512,0.2344
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,1024,0.2325
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,2048,0.2353
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,4096,0.2588
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,8192,0.2659
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,16384,0.2764
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,32768,0.2985
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,65536,0.3536
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,131072,0.4654
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,2,0.2603
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,4,0.2548
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,8,0.2611
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,16,0.2549
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,32,0.2569
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,128,0.2551
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,64,0.2611
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,256,0.2625
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,512,0.2609
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,1024,0.2729
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,2048,0.2719
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,4096,0.2944
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,16384,0.3238
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,32768,0.3747
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,65536,0.4793
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,8192,0.3102
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,2,0.3497
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,131072,0.6616
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,4,0.3495
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,8,0.3525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,16,0.3503
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,32,0.3552
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,64,0.3525
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,128,0.3580
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,256,0.3561
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,512,0.3576
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,2048,0.3738
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,1024,0.3613
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,4096,0.3950
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,8192,0.4277
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,16384,0.4810
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,65536,0.8384
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,32768,0.6013
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,2,0.4969
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,4,0.4963
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,8,0.4949
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,16,0.4990
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,131072,1.4002
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,32,0.4967
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,64,0.5006
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,128,0.5069
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,256,0.5070
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,512,0.5108
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,2048,0.5328
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,1024,0.5193
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,4096,0.5831
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,8192,0.6393
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,16384,0.7519
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,32768,0.9817
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,4,0.7645
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,2,0.7645
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,65536,1.4888
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,16,0.7650
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,8,0.7629
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,32,0.7644
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,64,0.7659
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,128,0.7677
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,256,0.7737
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,512,0.7806
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,2048,0.8215
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,4096,0.9119
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,16384,1.2306
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,1024,0.7960
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,8192,1.0164
VLLM,0.17.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,32768,1.6616
