framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,0,0.7631
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,4,1,0,0.7567
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,8,1,0,0.7427
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,16,1,0,0.7544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,32,1,0,0.7563
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,64,1,0,0.7356
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,128,1,0,0.7323
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,256,1,0,0.7501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,512,1,0,0.8008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1024,1,0,0.9205
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,2048,1,0,1.3808
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,4096,1,0,6.5903
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,8192,1,0,13.8751
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,16384,1,0,29.8491
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,0,0.7510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,4,1,0,0.7462
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,8,1,0,0.7435
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,16,1,0,0.7320
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,32,1,0,0.7177
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,64,1,0,0.7364
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,128,1,0,0.7467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,256,1,0,0.7948
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,512,1,0,0.8940
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1024,1,0,1.2767
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,2048,1,0,2.0999
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,4096,1,0,13.7285
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,8192,1,0,28.1448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,16384,1,0,59.4217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,0,0.7419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,4,1,0,0.7280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,8,1,0,0.7356
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,16,1,0,0.7333
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,32,1,0,0.7348
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,64,1,0,0.7558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,128,1,0,0.8099
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,256,1,0,0.8949
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,512,1,0,1.1883
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1024,1,0,1.8254
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,2048,1,0,4.1211
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,4096,1,0,26.8127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,8192,1,0,56.4224
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,0,0.7731
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,4,1,0,0.7740
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,8,1,0,0.7741
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,16,1,0,0.7730
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,32,1,0,0.7553
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,64,1,0,0.7558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,128,1,0,0.7860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,256,1,0,0.7647
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,512,1,0,0.7904
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1024,1,0,0.8447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,2048,1,0,1.0166
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,4096,1,0,4.0868
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,8192,1,0,7.7665
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,16384,1,0,17.8207
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,0,0.7533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,4,1,0,0.7600
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,8,1,0,0.7662
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,16,1,0,0.7566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,32,1,0,0.7581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,64,1,0,0.7531
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,128,1,0,0.7479
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,256,1,0,0.7860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,512,1,0,0.8289
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1024,1,0,0.9489
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,2048,1,0,1.3204
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,4096,1,0,7.4904
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,8192,1,0,15.7861
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,16384,1,0,35.1041
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,0,0.7685
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,4,1,0,0.7708
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,8,1,0,0.7547
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,16,1,0,0.7626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,32,1,0,0.7643
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,64,1,0,0.7681
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,128,1,0,0.7709
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,256,1,0,0.8263
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,512,1,0,0.9203
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1024,1,0,1.1901
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,2048,1,0,2.1354
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,4096,1,0,14.9582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,8192,1,0,31.9305
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,16384,1,0,71.8201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,0,0.7530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,4,1,0,0.7590
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,8,1,0,0.7530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,16,1,0,0.7508
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,32,1,0,0.7579
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,64,1,0,0.7780
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,128,1,0,0.8326
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,256,1,0,0.9177
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,512,1,0,1.1135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1024,1,0,1.8725
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,2048,1,0,4.0498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,4096,1,0,29.7658
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,8192,1,0,63.9902
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,0,0.7570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,4,1,0,0.7504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,8,1,0,0.7548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,16,1,0,0.7538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,32,1,0,0.7866
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,64,1,0,0.8352
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,128,1,0,0.9248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,256,1,0,1.0924
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,512,1,0,1.7412
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1024,1,0,3.6292
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,2048,1,0,8.7308
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,4096,1,0,59.6712
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,0,0.7546
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,4,1,0,0.7595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,8,1,0,0.7713
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,16,1,0,0.7829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,32,1,0,0.8269
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,64,1,0,0.9090
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,128,1,0,1.0819
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,256,1,0,1.6736
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,512,1,0,3.4069
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1024,1,0,7.8389
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,2048,1,0,17.8449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,0,0.7725
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,4,1,0,0.7771
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,8,1,0,0.7664
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,16,1,0,0.7598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,32,1,0,0.7575
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,64,1,0,0.7607
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,128,1,0,0.7639
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,256,1,0,0.7592
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,512,1,0,0.7638
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1024,1,0,0.7946
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,2048,1,0,0.8514
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,4096,1,0,3.8321
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,8192,1,0,7.2265
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,16384,1,0,16.4149
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,0,0.7599
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,4,1,0,0.7629
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,8,1,0,0.7616
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,16,1,0,0.7619
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,32,1,0,0.7561
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,64,1,0,0.7529
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,128,1,0,0.7641
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,256,1,0,0.7499
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,512,1,0,0.7811
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1024,1,0,0.8352
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,2048,1,0,0.9824
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,4096,1,0,6.8438
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,8192,1,0,14.6315
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,16384,1,0,32.2975
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,0,0.7524
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,4,1,0,0.7516
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,8,1,0,0.7379
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,16,1,0,0.7600
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,32,1,0,0.7475
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,64,1,0,0.7671
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,128,1,0,0.7639
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,256,1,0,0.7833
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,512,1,0,0.8395
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1024,1,0,0.9225
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,2048,1,0,1.3294
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,4096,1,0,13.6100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,8192,1,0,29.0930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,16384,1,0,66.9515
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,0,0.7601
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,4,1,0,0.7640
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,8,1,0,0.7683
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,16,1,0,0.7558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,32,1,0,0.7524
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,64,1,0,0.7544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,128,1,0,0.7775
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,256,1,0,0.8331
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,512,1,0,0.8877
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1024,1,0,1.1992
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,2048,1,0,2.2350
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,4096,1,0,26.8302
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,8192,1,0,62.2976
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,0,0.7508
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,4,1,0,0.7453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,8,1,0,0.7670
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,16,1,0,0.7566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,32,1,0,0.7611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,64,1,0,0.7790
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,128,1,0,0.8229
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,256,1,0,0.8933
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,512,1,0,1.1404
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1024,1,0,1.9731
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,2048,1,0,4.5216
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,4096,1,0,54.1971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,0,0.7466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,4,1,0,0.7472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,8,1,0,0.7570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,16,1,0,0.7758
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,32,1,0,0.7945
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,64,1,0,0.8439
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,128,1,0,0.8896
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,256,1,0,1.0990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,512,1,0,1.8426
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1024,1,0,3.9487
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,2048,1,0,9.4904
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,0,0.7578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,4,1,0,0.7544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,8,1,0,0.7536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,16,1,0,0.7815
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,32,1,0,0.8155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,64,1,0,0.8947
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,128,1,0,1.0916
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,256,1,0,1.7724
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,512,1,0,3.6545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1024,1,0,8.2281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,2048,1,0,18.9254
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,0,0.7744
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,4,1,0,0.7983
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,8,1,0,0.8360
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,16,1,0,0.8726
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,32,1,0,0.9440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,64,1,0,1.1434
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,128,1,0,1.7481
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,256,1,0,3.4276
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,512,1,0,7.6518
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1024,1,0,16.7781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,0,0.8971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,4,1,0,0.9438
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,8,1,0,0.9947
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,16,1,0,1.0505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,32,1,0,1.2712
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,64,1,0,1.8556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,128,1,0,3.3319
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,256,1,0,7.2514
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,512,1,0,15.5013
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,0,1.1371
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,4,1,0,1.2514
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,8,1,0,1.3085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,16,1,0,1.4981
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,32,1,0,2.0900
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,64,1,0,3.6266
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,128,1,0,7.0688
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,256,1,0,14.9088
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,0,1.6310
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,4,1,0,1.7795
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,8,1,0,1.9670
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,16,1,0,2.5505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,32,1,0,4.1761
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,64,1,0,7.5944
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,128,1,0,14.6577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,0,0.7763
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,4,1,0,0.7649
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,8,1,0,0.7635
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,16,1,0,0.7682
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,32,1,0,0.7715
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,64,1,0,0.7626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,128,1,0,0.7577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,256,1,0,0.7651
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,512,1,0,0.7728
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1024,1,0,0.7652
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,2048,1,0,0.7867
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,4096,1,0,3.7174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,8192,1,0,6.4698
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,16384,1,0,15.0407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,0,0.7719
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,4,1,0,0.7613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,8,1,0,0.7579
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,16,1,0,0.7556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,32,1,0,0.7482
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,64,1,0,0.7539
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,128,1,0,0.7452
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,256,1,0,0.7557
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,512,1,0,0.7579
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1024,1,0,0.7811
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,2048,1,0,0.8195
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,4096,1,0,6.1105
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,8192,1,0,13.3448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,16384,1,0,29.4072
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,0,0.7529
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,4,1,0,0.7508
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,8,1,0,0.7465
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,16,1,0,0.7474
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,32,1,0,0.7497
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,64,1,0,0.7465
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,128,1,0,0.7499
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,256,1,0,0.7547
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,512,1,0,0.7763
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1024,1,0,0.8157
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,2048,1,0,0.9858
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,4096,1,0,12.1530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,8192,1,0,26.4326
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,16384,1,0,59.9814
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,0,0.7471
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,4,1,0,0.7518
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,8,1,0,0.7533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,16,1,0,0.7545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,32,1,0,0.7563
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,64,1,0,1.1962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,128,1,0,0.7668
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,256,1,0,0.7897
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,512,1,0,0.8144
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1024,1,0,0.9296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,2048,1,0,1.3483
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,4096,1,0,24.3003
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,8192,1,0,56.1041
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,0,0.7542
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,4,1,0,0.7501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,8,1,0,0.7533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,16,1,0,0.7572
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,32,1,0,0.7575
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,64,1,0,0.7584
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,128,1,0,0.7888
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,256,1,0,0.8306
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,512,1,0,0.9088
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1024,1,0,1.2263
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,2048,1,0,2.4615
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,4096,1,0,48.2139
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,0,0.7606
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,4,1,0,0.7614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,8,1,0,0.7684
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,16,1,0,0.7664
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,32,1,0,0.7697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,64,1,0,0.7896
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,128,1,0,0.8141
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,256,1,0,0.9046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,512,1,0,1.1502
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1024,1,0,2.2217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,2048,1,0,5.1352
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,0,0.7530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,4,1,0,0.7474
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,8,1,0,0.7634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,16,1,0,0.7617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,32,1,0,0.7796
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,64,1,0,0.8124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,128,1,0,0.9058
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,256,1,0,1.1198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,512,1,0,2.0816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1024,1,0,4.4781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,2048,1,0,10.6539
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,0,0.7581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,4,1,0,0.7567
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,8,1,0,0.7677
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,16,1,0,0.7848
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,32,1,0,0.8176
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,64,1,0,0.8913
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,128,1,0,1.0990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,256,1,0,2.0083
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,512,1,0,4.1653
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1024,1,0,9.5255
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,0,0.7986
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,4,1,0,0.8021
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,8,1,0,0.8265
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,16,1,0,0.8546
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,32,1,0,0.9620
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,64,1,0,1.1517
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,128,1,0,1.9935
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,256,1,0,4.0259
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,512,1,0,8.8828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,0,0.9497
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,4,1,0,0.9727
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,8,1,0,1.0023
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,16,1,0,1.0908
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,32,1,0,1.2895
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,64,1,0,2.1075
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,128,1,0,3.9256
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,256,1,0,8.4391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,0,1.1684
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,4,1,0,1.2404
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,8,1,0,1.3370
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,16,1,0,1.5270
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,32,1,0,2.3367
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,64,1,0,4.2960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,128,1,0,8.3872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,0,0.7585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,4,1,0,0.7585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,8,1,0,0.7596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,16,1,0,0.7585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,32,1,0,0.7487
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,64,1,0,0.7550
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,128,1,0,0.7475
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,256,1,0,0.7509
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,512,1,0,0.7698
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1024,1,0,0.7538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,2048,1,0,0.7761
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,4096,1,0,3.4016
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,8192,1,0,6.2472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,16384,1,0,14.1583
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,0,0.8120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,4,1,0,0.8079
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,8,1,0,0.8107
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,16,1,0,0.7834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,32,1,0,0.7466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,64,1,0,0.7596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,128,1,0,0.7476
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,256,1,0,0.7653
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,512,1,0,0.7626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1024,1,0,0.7698
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,2048,1,0,0.7796
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,4096,1,0,5.9142
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,8192,1,0,12.5169
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,16384,1,0,27.9660
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,0,0.7547
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,4,1,0,0.7617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,8,1,0,0.7496
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,16,1,0,0.7513
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,32,1,0,0.7514
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,64,1,0,0.7551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,128,1,0,0.7686
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,256,1,0,0.7669
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,512,1,0,0.7604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1024,1,0,0.7880
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,2048,1,0,0.8359
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,4096,1,0,11.5827
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,8192,1,0,24.8173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,16384,1,0,60.0755
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,0,0.7543
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,4,1,0,0.7562
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,8,1,0,0.7548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,16,1,0,0.7479
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,32,1,0,0.7581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,64,1,0,0.7457
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,128,1,0,0.7615
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,256,1,0,0.7705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,512,1,0,0.7720
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1024,1,0,0.8277
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,2048,1,0,1.0087
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,4096,1,0,22.9610
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,8192,1,0,52.3530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,0,0.7468
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,4,1,0,0.7546
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,8,1,0,0.7509
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,16,1,0,0.7542
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,32,1,0,0.7671
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,64,1,0,0.7581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,128,1,0,0.7511
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,256,1,0,0.7751
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,512,1,0,0.8262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1024,1,0,0.9616
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,2048,1,0,1.5638
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,4096,1,0,45.6979
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,0,0.7538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,4,1,0,0.7503
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,8,1,0,0.7508
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,16,1,0,0.7552
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,32,1,0,0.7537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,64,1,0,0.7566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,128,1,0,0.7696
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,256,1,0,0.8335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,512,1,0,0.9140
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1024,1,0,1.4329
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,2048,1,0,3.0439
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,0,0.7523
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,4,1,0,0.7507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,8,1,0,0.7527
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,16,1,0,0.7737
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,32,1,0,0.7524
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,64,1,0,0.7697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,128,1,0,0.8193
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,256,1,0,0.9106
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,512,1,0,1.3720
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1024,1,0,2.7769
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,2048,1,0,6.2548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,0,0.7469
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,4,1,0,0.7465
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,8,1,0,0.7463
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,16,1,0,0.7535
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,32,1,0,0.7792
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,64,1,0,0.8152
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,128,1,0,0.9206
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,256,1,0,1.3420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,512,1,0,2.6649
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1024,1,0,5.6988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,0,0.7535
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,4,1,0,0.7464
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,8,1,0,0.7681
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,16,1,0,0.7715
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,32,1,0,0.8200
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,64,1,0,0.9201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,128,1,0,1.3302
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,256,1,0,2.5847
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,512,1,0,5.3653
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,0,0.7784
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,4,1,0,0.8092
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,8,1,0,0.8227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,16,1,0,0.8780
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,32,1,0,0.9892
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,64,1,0,1.3834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,128,1,0,2.5618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,256,1,0,5.2140
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,0,0.9046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,4,1,0,0.9656
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,8,1,0,1.0052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,16,1,0,1.1023
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,32,1,0,1.5013
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,64,1,0,2.6580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,128,1,0,5.1375
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,0,0.8299
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,4,1,0,0.8488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,8,1,0,0.8472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,16,1,0,0.8638
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,32,1,0,0.8559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,64,1,0,0.8486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,128,1,0,0.8542
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,256,1,0,0.8449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,512,1,0,0.9040
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1024,1,0,1.0201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,2048,1,0,1.4792
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,4096,1,0,8.1638
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,8192,1,0,17.0599
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,16384,1,0,35.1247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,0,0.8638
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,4,1,0,0.8746
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,8,1,0,0.8656
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,16,1,0,0.8424
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,32,1,0,0.8642
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,64,1,0,0.8642
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,128,1,0,0.8816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,256,1,0,0.8960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,512,1,0,1.0176
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1024,1,0,1.3466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,2048,1,0,2.2075
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,4096,1,0,16.0593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,8192,1,0,33.5967
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,16384,1,0,71.4330
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,0,0.8364
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,4,1,0,0.8215
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,8,1,0,0.8305
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,16,1,0,0.8224
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,32,1,0,0.8288
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,64,1,0,0.8378
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,128,1,0,0.8993
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,256,1,0,0.9852
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,512,1,0,1.2773
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1024,1,0,1.9276
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,2048,1,0,4.1324
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,4096,1,0,32.1804
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,8192,1,0,69.0985
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,0,0.8459
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,4,1,0,0.8636
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,8,1,0,0.8567
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,16,1,0,0.8638
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,32,1,0,0.8690
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,64,1,0,0.8670
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,128,1,0,0.8614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,256,1,0,0.8693
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,512,1,0,0.8814
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1024,1,0,0.9425
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,2048,1,0,1.1178
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,4096,1,0,5.1115
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,8192,1,0,10.1362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,16384,1,0,21.5852
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,0,1.6478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,4,1,0,1.6227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,8,1,0,1.5593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,16,1,0,1.6311
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,32,1,0,1.6498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,64,1,0,1.3985
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,128,1,0,0.8703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,256,1,0,0.8841
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,512,1,0,0.9413
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1024,1,0,1.0613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,2048,1,0,1.4138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,4096,1,0,9.0910
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,8192,1,0,19.7734
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,16384,1,0,45.2752
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,0,0.8663
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,4,1,0,0.8655
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,8,1,0,0.8775
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,16,1,0,0.9021
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,32,1,0,0.8709
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,64,1,0,0.8721
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,128,1,0,0.8933
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,256,1,0,0.9454
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,512,1,0,1.0568
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1024,1,0,1.3088
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,2048,1,0,2.2349
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,4096,1,0,18.6909
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,8192,1,0,39.9738
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,16384,1,0,93.1663
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,0,0.8696
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,4,1,0,0.8661
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,8,1,0,0.8740
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,16,1,0,0.8678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,32,1,0,0.8616
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,64,1,0,0.8940
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,128,1,0,0.9348
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,256,1,0,1.0304
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,512,1,0,1.2280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1024,1,0,1.9819
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,2048,1,0,4.4018
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,4096,1,0,37.4475
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,8192,1,0,85.4008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,0,0.8839
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,4,1,0,0.8719
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,8,1,0,0.8740
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,16,1,0,0.8661
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,32,1,0,0.8890
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,64,1,0,0.9353
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,128,1,0,1.0391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,256,1,0,1.2001
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,512,1,0,1.8461
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1024,1,0,3.6943
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,2048,1,0,9.1142
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,4096,1,0,82.4001
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,0,0.8794
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,4,1,0,0.8807
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,8,1,0,0.8753
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,16,1,0,0.8949
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,32,1,0,0.9445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,64,1,0,1.0431
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,128,1,0,1.1857
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,256,1,0,1.7804
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,512,1,0,3.4975
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1024,1,0,7.8530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,2048,1,0,17.9781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,0,0.8449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,4,1,0,0.8566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,8,1,0,0.8603
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,16,1,0,0.8608
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,32,1,0,0.8630
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,64,1,0,0.8531
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,128,1,0,0.8603
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,256,1,0,0.8563
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,512,1,0,0.8586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1024,1,0,0.8755
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,2048,1,0,0.9285
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,4096,1,0,4.8332
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,8192,1,0,9.2445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,16384,1,0,20.0478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,0,0.8505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,4,1,0,0.8468
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,8,1,0,0.8515
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,16,1,0,0.8474
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,32,1,0,0.8460
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,64,1,0,0.8500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,128,1,0,0.8437
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,256,1,0,0.8432
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,512,1,0,0.8687
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1024,1,0,0.9201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,2048,1,0,1.0626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,4096,1,0,8.6970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,8192,1,0,18.5813
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,16384,1,0,41.0730
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,0,0.8345
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,4,1,0,0.8442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,8,1,0,0.8427
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,16,1,0,0.8783
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,32,1,0,0.8489
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,64,1,0,0.8452
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,128,1,0,0.8476
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,256,1,0,0.8718
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,512,1,0,0.9203
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1024,1,0,1.0105
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,2048,1,0,1.4144
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,4096,1,0,17.4193
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,8192,1,0,37.3199
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,16384,1,0,86.2968
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,0,0.8596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,4,1,0,0.8433
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,8,1,0,0.8451
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,16,1,0,0.8575
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,32,1,0,0.8457
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,64,1,0,0.8620
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,128,1,0,0.8774
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,256,1,0,0.9346
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,512,1,0,0.9915
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1024,1,0,1.3081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,2048,1,0,2.3002
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,4096,1,0,34.9292
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,8192,1,0,80.6135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,0,0.8530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,4,1,0,0.8491
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,8,1,0,0.8448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,16,1,0,0.8364
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,32,1,0,0.8634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,64,1,0,0.8792
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,128,1,0,0.9138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,256,1,0,0.9775
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,512,1,0,1.2340
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1024,1,0,2.0570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,2048,1,0,4.6695
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,4096,1,0,77.1006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,0,0.8617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,4,1,0,0.8530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,8,1,0,0.8581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,16,1,0,0.8528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,32,1,0,0.8707
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,64,1,0,0.9190
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,128,1,0,0.9922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,256,1,0,1.1971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,512,1,0,1.9210
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1024,1,0,4.0602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,2048,1,0,9.7660
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,4096,1,0,197.7211
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,0,0.8572
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,4,1,0,0.8597
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,8,1,0,0.8516
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,16,1,0,0.8629
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,32,1,0,0.9117
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,64,1,0,0.9766
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,128,1,0,1.1795
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,256,1,0,1.8477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,512,1,0,3.7121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1024,1,0,8.4793
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,2048,1,0,19.2516
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,0,0.8780
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,4,1,0,0.9114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,8,1,0,0.9192
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,16,1,0,0.9740
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,32,1,0,1.0487
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,64,1,0,1.2383
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,128,1,0,1.8228
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,256,1,0,3.5450
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,512,1,0,7.9732
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1024,1,0,17.1035
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,0,1.0192
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,4,1,0,1.0373
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,8,1,0,1.0789
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,16,1,0,1.1559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,32,1,0,1.3514
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,64,1,0,1.9357
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,128,1,0,3.5218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,256,1,0,7.5385
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,512,1,0,16.0006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,0,1.2570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,4,1,0,1.3321
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,8,1,0,1.3950
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,16,1,0,1.5777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,32,1,0,2.1636
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,64,1,0,3.7275
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,128,1,0,7.3548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,256,1,0,15.3540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,0,1.7410
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,4,1,0,1.8657
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,8,1,0,2.0546
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,16,1,0,2.6525
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,32,1,0,4.2275
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,64,1,0,8.0605
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,128,1,0,15.1220
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,0,0.8333
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,4,1,0,0.8679
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,8,1,0,0.8849
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,16,1,0,0.8777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,32,1,0,0.8873
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,64,1,0,0.8744
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,128,1,0,0.8610
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,256,1,0,0.8765
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,512,1,0,0.8680
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1024,1,0,0.8797
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,2048,1,0,0.9047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,4096,1,0,4.6528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,8192,1,0,8.7409
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,16384,1,0,18.8235
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,0,0.8524
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,4,1,0,0.8505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,8,1,0,0.8512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,16,1,0,0.8567
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,32,1,0,0.8459
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,64,1,0,0.8632
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,128,1,0,0.8443
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,256,1,0,0.8685
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,512,1,0,0.8540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1024,1,0,0.8705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,2048,1,0,0.9169
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,4096,1,0,8.4183
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,8192,1,0,17.2041
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,16384,1,0,37.7620
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,0,0.8549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,4,1,0,0.8424
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,8,1,0,0.8582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,16,1,0,0.8856
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,32,1,0,0.8477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,64,1,0,0.8530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,128,1,0,0.8647
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,256,1,0,0.8621
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,512,1,0,0.8787
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1024,1,0,0.8961
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,2048,1,0,1.0940
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,4096,1,0,16.1987
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,8192,1,0,34.3226
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,16384,1,0,81.2938
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,0,0.8579
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,4,1,0,0.8548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,8,1,0,0.8506
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,16,1,0,0.8571
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,32,1,0,0.8611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,64,1,0,0.8650
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,128,1,0,0.8635
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,256,1,0,0.8917
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,512,1,0,0.9171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1024,1,0,1.0411
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,2048,1,0,1.4377
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,4096,1,0,32.2905
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,8192,1,0,75.8840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,0,0.8726
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,4,1,0,0.8739
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,8,1,0,0.8747
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,16,1,0,0.8630
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,32,1,0,0.8742
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,64,1,0,0.8634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,128,1,0,0.8918
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,256,1,0,0.9294
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,512,1,0,1.0361
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1024,1,0,1.3205
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,2048,1,0,2.6212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,4096,1,0,71.6219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,0,0.8545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,4,1,0,0.8465
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,8,1,0,0.8612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,16,1,0,0.8629
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,32,1,0,0.8650
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,64,1,0,0.8843
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,128,1,0,0.9110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,256,1,0,0.9908
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,512,1,0,1.2442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1024,1,0,2.3585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,2048,1,0,5.3095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,4096,1,0,187.2602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,0,0.8407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,4,1,0,0.8512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,8,1,0,0.8511
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,16,1,0,0.8468
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,32,1,0,0.8730
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,64,1,0,0.9172
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,128,1,0,1.0018
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,256,1,0,1.2130
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,512,1,0,2.2227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1024,1,0,4.7711
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,2048,1,0,11.0092
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,0,0.8362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,4,1,0,0.8568
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,8,1,0,0.8537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,16,1,0,0.8679
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,32,1,0,0.8959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,64,1,0,0.9881
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,128,1,0,1.1931
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,256,1,0,2.1498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,512,1,0,4.4505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1024,1,0,10.0256
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,0,0.8870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,4,1,0,0.8997
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,8,1,0,0.9214
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,16,1,0,0.9584
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,32,1,0,1.0455
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,64,1,0,1.2419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,128,1,0,2.1294
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,256,1,0,4.2898
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,512,1,0,9.3164
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,0,1.0206
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,4,1,0,1.0568
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,8,1,0,1.0839
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,16,1,0,1.1745
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,32,1,0,1.3682
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,64,1,0,2.2427
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,128,1,0,4.2149
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,256,1,0,8.9263
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,0,1.2522
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,4,1,0,1.4120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,8,1,0,1.4298
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,16,1,0,1.6184
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,32,1,0,2.4731
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,64,1,0,4.4540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,128,1,0,8.8040
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,0,0.8483
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,4,1,0,0.8567
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,8,1,0,0.8711
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,16,1,0,0.8707
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,32,1,0,0.8776
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,64,1,0,0.8536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,128,1,0,0.8677
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,256,1,0,0.9030
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,512,1,0,0.8675
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1024,1,0,0.8638
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,2048,1,0,0.8773
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,4096,1,0,4.4697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,8192,1,0,8.4307
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,16384,1,0,18.2060
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,0,0.8645
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,4,1,0,0.8496
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,8,1,0,0.8546
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,16,1,0,0.8512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,32,1,0,0.8482
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,64,1,0,0.8543
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,128,1,0,0.8537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,256,1,0,0.8598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,512,1,0,0.8609
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1024,1,0,0.8650
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,2048,1,0,0.8726
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,4096,1,0,7.8736
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,8192,1,0,16.5137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,16384,1,0,36.1540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,0,0.8650
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,4,1,0,0.8714
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,8,1,0,0.8614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,16,1,0,0.8771
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,32,1,0,0.8545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,64,1,0,0.8698
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,128,1,0,0.8596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,256,1,0,0.8448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,512,1,0,0.8622
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1024,1,0,0.8700
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,2048,1,0,0.9256
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,4096,1,0,15.4290
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,8192,1,0,32.7076
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,16384,1,0,78.1197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,0,0.8588
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,4,1,0,0.8595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,8,1,0,0.8604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,16,1,0,0.8652
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,32,1,0,0.8605
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,64,1,0,0.8638
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,128,1,0,0.8570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,256,1,0,0.8683
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,512,1,0,0.8823
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1024,1,0,0.9246
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,2048,1,0,1.1226
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,4096,1,0,30.9887
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,8192,1,0,72.4812
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,0,0.8528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,4,1,0,0.8442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,8,1,0,0.8465
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,16,1,0,0.8594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,32,1,0,0.8613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,64,1,0,0.9378
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,128,1,0,0.8595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,256,1,0,0.8739
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,512,1,0,0.9229
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1024,1,0,1.0492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,2048,1,0,1.6957
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,4096,1,0,67.8835
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,0,0.8563
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,4,1,0,0.8542
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,8,1,0,0.8548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,16,1,0,0.8654
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,32,1,0,0.8550
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,64,1,0,0.8647
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,128,1,0,0.8755
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,256,1,0,0.9308
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,512,1,0,1.0219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1024,1,0,1.5712
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,2048,1,0,3.3047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,4096,1,0,183.8201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,0,0.8645
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,4,1,0,0.8575
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,8,1,0,0.8633
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,16,1,0,0.8624
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,32,1,0,0.8610
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,64,1,0,0.8802
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,128,1,0,0.9364
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,256,1,0,1.0217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,512,1,0,1.5061
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1024,1,0,3.0448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,2048,1,0,6.7520
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,0,0.8649
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,4,1,0,0.8634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,8,1,0,0.8661
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,16,1,0,0.8750
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,32,1,0,0.8823
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,64,1,0,0.9380
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,128,1,0,1.0213
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,256,1,0,1.4738
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,512,1,0,2.9113
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1024,1,0,6.1757
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,0,0.8657
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,4,1,0,0.8655
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,8,1,0,0.8641
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,16,1,0,0.8862
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,32,1,0,0.9413
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,64,1,0,1.0255
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,128,1,0,1.4650
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,256,1,0,2.8361
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,512,1,0,5.8330
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,0,0.9017
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,4,1,0,0.9119
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,8,1,0,0.9407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,16,1,0,0.9789
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,32,1,0,1.0711
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,64,1,0,1.5216
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,128,1,0,2.8213
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,256,1,0,5.6503
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,0,1.0254
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,4,1,0,1.0547
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,8,1,0,1.1101
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,16,1,0,1.1976
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,32,1,0,1.6307
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,64,1,0,2.9194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,128,1,0,5.6098
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,0,0.7580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,4,1,0,0.7627
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,8,1,0,0.7397
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,16,1,0,0.7415
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,32,1,0,0.7294
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,64,1,0,0.7324
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,128,1,0,0.7319
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,256,1,0,0.7574
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,512,1,0,0.8098
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1024,1,0,0.9277
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,2048,1,0,1.3829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,4096,1,0,7.2556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,8192,1,0,14.1618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,16384,1,0,30.1669
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,0,0.7574
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,4,1,0,0.7443
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,8,1,0,0.7394
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,16,1,0,0.7328
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,32,1,0,0.7310
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,64,1,0,0.7311
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,128,1,0,0.7541
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,256,1,0,0.8040
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,512,1,0,0.8808
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1024,1,0,1.2569
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,2048,1,0,2.0741
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,4096,1,0,13.3143
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,8192,1,0,28.3568
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,16384,1,0,62.0225
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,0,0.7458
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,4,1,0,0.7402
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,8,1,0,0.7259
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,16,1,0,0.7396
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,32,1,0,0.7429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,64,1,0,0.7498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,128,1,0,0.8028
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,256,1,0,0.8929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,512,1,0,1.1931
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1024,1,0,1.8350
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,2048,1,0,4.1099
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,4096,1,0,27.0980
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,8192,1,0,59.4763
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,0,0.7680
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,4,1,0,0.7623
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,8,1,0,0.7612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,16,1,0,0.7641
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,32,1,0,0.7606
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,64,1,0,0.7541
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,128,1,0,0.7645
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,256,1,0,0.7486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,512,1,0,0.7862
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1024,1,0,0.8374
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,2048,1,0,1.0177
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,4096,1,0,3.9799
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,8192,1,0,7.7795
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,16384,1,0,17.9013
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,0,0.7626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,4,1,0,0.7629
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,8,1,0,0.7582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,16,1,0,0.7459
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,32,1,0,0.7528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,64,1,0,0.7553
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,128,1,0,0.7507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,256,1,0,0.7682
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,512,1,0,0.8364
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1024,1,0,0.9441
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,2048,1,0,1.3158
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,4096,1,0,7.5521
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,8192,1,0,16.1436
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,16384,1,0,35.7402
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,0,0.7484
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,4,1,0,0.7390
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,8,1,0,0.7670
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,16,1,0,0.7428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,32,1,0,0.7649
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,64,1,0,0.7643
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,128,1,0,0.7834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,256,1,0,0.8263
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,512,1,0,0.9208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1024,1,0,1.1881
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,2048,1,0,2.1251
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,4096,1,0,14.9123
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,8192,1,0,32.0875
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,16384,1,0,71.1471
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,0,0.7544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,4,1,0,0.7528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,8,1,0,0.7632
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,16,1,0,0.7529
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,32,1,0,0.7616
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,64,1,0,0.7842
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,128,1,0,0.8302
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,256,1,0,0.9249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,512,1,0,1.1240
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1024,1,0,1.8736
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,2048,1,0,4.2654
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,4096,1,0,29.9194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,8192,1,0,66.6283
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,0,0.7579
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,4,1,0,0.7690
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,8,1,0,0.7508
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,16,1,0,0.7530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,32,1,0,0.7777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,64,1,0,0.8509
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,128,1,0,0.9826
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,256,1,0,1.1016
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,512,1,0,1.7493
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1024,1,0,3.7363
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,2048,1,0,8.8042
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,4096,1,0,62.6799
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,0,0.7799
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,4,1,0,0.7711
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,8,1,0,0.7846
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,16,1,0,0.8231
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,32,1,0,0.8353
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,64,1,0,0.9428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,128,1,0,1.1097
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,256,1,0,1.6808
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,512,1,0,3.4203
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1024,1,0,7.7695
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,2048,1,0,17.8538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,0,0.7528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,4,1,0,0.7655
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,8,1,0,0.7636
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,16,1,0,0.7662
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,32,1,0,0.7643
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,64,1,0,0.7676
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,128,1,0,0.7577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,256,1,0,0.7638
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,512,1,0,0.7663
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1024,1,0,0.7807
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,2048,1,0,0.8381
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,4096,1,0,3.8105
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,8192,1,0,7.1230
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,16384,1,0,16.2994
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,0,0.8164
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,4,1,0,0.7596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,8,1,0,0.7666
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,16,1,0,0.7664
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,32,1,0,0.7517
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,64,1,0,0.7410
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,128,1,0,0.7466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,256,1,0,0.7597
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,512,1,0,0.7802
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1024,1,0,0.8137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,2048,1,0,0.9819
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,4096,1,0,6.6331
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,8192,1,0,14.6920
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,16384,1,0,32.4265
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,0,0.7464
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,4,1,0,0.7506
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,8,1,0,0.7657
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,16,1,0,0.7581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,32,1,0,0.7604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,64,1,0,0.7634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,128,1,0,0.7614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,256,1,0,0.7899
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,512,1,0,0.8404
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1024,1,0,0.9249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,2048,1,0,1.3323
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,4096,1,0,13.4496
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,8192,1,0,29.2987
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,16384,1,0,66.1157
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,0,0.7521
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,4,1,0,0.7675
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,8,1,0,0.7558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,16,1,0,0.7550
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,32,1,0,0.7722
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,64,1,0,0.7688
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,128,1,0,0.7839
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,256,1,0,0.8291
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,512,1,0,0.8943
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1024,1,0,1.1973
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,2048,1,0,2.2249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,4096,1,0,27.2538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,8192,1,0,62.5217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,0,0.7490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,4,1,0,0.7575
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,8,1,0,0.7538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,16,1,0,0.7535
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,32,1,0,0.7533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,64,1,0,0.7972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,128,1,0,0.8328
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,256,1,0,0.8938
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,512,1,0,1.1461
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1024,1,0,1.9795
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,2048,1,0,4.5686
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,4096,1,0,54.6185
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,0,0.7593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,4,1,0,0.7543
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,8,1,0,0.7542
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,16,1,0,0.7567
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,32,1,0,0.7916
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,64,1,0,0.8296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,128,1,0,0.8957
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,256,1,0,1.1033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,512,1,0,1.8494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1024,1,0,3.9456
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,2048,1,0,9.5579
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,0,0.7651
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,4,1,0,0.7553
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,8,1,0,0.7533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,16,1,0,0.7894
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,32,1,0,0.8244
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,64,1,0,0.8899
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,128,1,0,1.0899
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,256,1,0,1.7760
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,512,1,0,3.5360
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1024,1,0,8.3304
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,2048,1,0,18.9501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,0,0.8053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,4,1,0,0.8092
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,8,1,0,0.8238
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,16,1,0,0.8799
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,32,1,0,0.9544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,64,1,0,1.1466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,128,1,0,1.7488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,256,1,0,3.4783
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,512,1,0,7.6456
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1024,1,0,16.6054
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,0,0.9247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,4,1,0,0.9463
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,8,1,0,1.0049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,16,1,0,1.0591
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,32,1,0,1.2625
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,64,1,0,1.8622
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,128,1,0,3.4112
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,256,1,0,7.3936
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,512,1,0,15.4922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,0,1.1964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,4,1,0,1.2456
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,8,1,0,1.3058
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,16,1,0,1.4992
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,32,1,0,2.0937
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,64,1,0,3.5868
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,128,1,0,7.0558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,256,1,0,15.0173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,0,1.6614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,4,1,0,1.7823
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,8,1,0,1.9678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,16,1,0,2.5566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,32,1,0,4.1588
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,64,1,0,7.8064
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,128,1,0,14.6581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,0,0.7842
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,4,1,0,0.7798
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,8,1,0,0.7703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,16,1,0,0.7650
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,32,1,0,0.7666
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,64,1,0,0.7712
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,128,1,0,0.7730
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,256,1,0,0.7615
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,512,1,0,0.7745
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1024,1,0,0.7737
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,2048,1,0,0.7942
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,4096,1,0,3.6339
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,8192,1,0,6.7111
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,16384,1,0,14.8237
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,0,0.9787
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,4,1,0,0.9886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,8,1,0,0.9182
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,16,1,0,0.7793
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,32,1,0,0.7652
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,64,1,0,0.7623
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,128,1,0,0.7625
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,256,1,0,0.7781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,512,1,0,0.7726
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1024,1,0,0.7889
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,2048,1,0,0.8158
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,4096,1,0,6.1309
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,8192,1,0,13.1383
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,16384,1,0,29.5827
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,0,0.7598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,4,1,0,0.7613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,8,1,0,0.7589
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,16,1,0,0.7656
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,32,1,0,0.7656
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,64,1,0,0.7666
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,128,1,0,0.7806
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,256,1,0,0.7767
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,512,1,0,0.7956
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1024,1,0,0.8362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,2048,1,0,1.0034
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,4096,1,0,11.9775
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,8192,1,0,26.4326
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,16384,1,0,62.4851
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,0,0.7648
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,4,1,0,0.7599
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,8,1,0,0.7591
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,16,1,0,0.7651
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,32,1,0,0.7626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,64,1,0,0.7866
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,128,1,0,0.7686
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,256,1,0,0.7968
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,512,1,0,0.8258
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1024,1,0,0.9430
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,2048,1,0,1.3563
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,4096,1,0,24.4538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,8192,1,0,55.3350
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,0,0.7563
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,4,1,0,0.7519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,8,1,0,0.7761
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,16,1,0,0.7545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,32,1,0,0.7701
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,64,1,0,0.7777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,128,1,0,0.7947
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,256,1,0,0.8303
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,512,1,0,0.9166
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1024,1,0,1.2184
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,2048,1,0,2.4656
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,4096,1,0,48.2157
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,0,0.7676
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,4,1,0,0.7499
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,8,1,0,0.7622
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,16,1,0,0.7659
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,32,1,0,0.7817
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,64,1,0,0.7962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,128,1,0,0.8082
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,256,1,0,0.9048
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,512,1,0,1.1554
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1024,1,0,2.2128
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,2048,1,0,5.0306
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,0,0.7548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,4,1,0,0.7679
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,8,1,0,0.7626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,16,1,0,0.7730
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,32,1,0,0.7782
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,64,1,0,0.8227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,128,1,0,0.9036
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,256,1,0,1.1274
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,512,1,0,2.0855
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1024,1,0,4.5725
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,2048,1,0,10.5206
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,0,0.7612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,4,1,0,0.7744
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,8,1,0,0.7650
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,16,1,0,0.7838
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,32,1,0,0.8331
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,64,1,0,0.9064
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,128,1,0,1.1164
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,256,1,0,2.0185
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,512,1,0,4.2738
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1024,1,0,9.5721
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,0,0.7925
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,4,1,0,0.8121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,8,1,0,0.8322
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,16,1,0,0.8714
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,32,1,0,0.9532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,64,1,0,1.1579
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,128,1,0,1.9995
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,256,1,0,4.0300
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,512,1,0,8.9134
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,0,0.9348
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,4,1,0,0.9710
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,8,1,0,0.9922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,16,1,0,1.0903
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,32,1,0,1.2930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,64,1,0,2.1046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,128,1,0,3.9695
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,256,1,0,8.5108
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,0,1.1718
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,4,1,0,1.2641
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,8,1,0,1.3408
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,16,1,0,1.5317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,32,1,0,2.3534
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,64,1,0,4.1755
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,128,1,0,8.4146
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,0,0.7599
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,4,1,0,0.7545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,8,1,0,0.7635
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,16,1,0,0.7577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,32,1,0,0.7626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,64,1,0,0.7556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,128,1,0,0.7658
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,256,1,0,0.7674
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,512,1,0,0.7668
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1024,1,0,0.7602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,2048,1,0,0.7705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,4096,1,0,3.6440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,8192,1,0,6.2287
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,16384,1,0,14.2033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,0,0.8495
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,4,1,0,0.8232
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,8,1,0,0.8112
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,16,1,0,0.7580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,32,1,0,0.7580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,64,1,0,0.7576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,128,1,0,0.7654
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,256,1,0,0.7713
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,512,1,0,0.7642
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1024,1,0,0.7766
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,2048,1,0,0.7878
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,4096,1,0,5.7094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,8192,1,0,12.6326
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,16384,1,0,28.0152
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,0,0.7703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,4,1,0,0.7609
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,8,1,0,0.7689
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,16,1,0,0.7600
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,32,1,0,0.7543
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,64,1,0,0.7596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,128,1,0,0.7561
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,256,1,0,0.7516
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,512,1,0,0.7740
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1024,1,0,0.7944
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,2048,1,0,0.8423
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,4096,1,0,11.4974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,8192,1,0,24.7522
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,16384,1,0,60.5035
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,0,0.7512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,4,1,0,0.7560
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,8,1,0,0.7556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,16,1,0,0.7539
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,32,1,0,0.7641
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,64,1,0,0.7626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,128,1,0,0.7690
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,256,1,0,0.7743
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,512,1,0,0.7772
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1024,1,0,0.8312
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,2048,1,0,1.0115
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,4096,1,0,22.8349
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,8192,1,0,52.2843
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,0,0.7689
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,4,1,0,0.7506
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,8,1,0,0.7606
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,16,1,0,0.7537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,32,1,0,0.7567
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,64,1,0,0.7560
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,128,1,0,0.7519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,256,1,0,0.7749
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,512,1,0,0.8256
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1024,1,0,0.9466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,2048,1,0,1.5614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,4096,1,0,45.5847
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,0,0.7571
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,4,1,0,0.7616
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,8,1,0,0.7555
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,16,1,0,0.7562
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,32,1,0,0.7666
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,64,1,0,0.7645
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,128,1,0,0.7816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,256,1,0,0.8354
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,512,1,0,0.9180
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1024,1,0,1.4360
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,2048,1,0,3.0580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,0,0.7585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,4,1,0,0.7504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,8,1,0,0.7506
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,16,1,0,0.7661
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,32,1,0,0.7618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,64,1,0,0.7744
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,128,1,0,0.8363
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,256,1,0,0.9133
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,512,1,0,1.3727
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1024,1,0,2.7875
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,2048,1,0,6.2173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,0,0.7470
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,4,1,0,0.7464
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,8,1,0,0.7542
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,16,1,0,0.7646
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,32,1,0,0.7762
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,64,1,0,0.8262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,128,1,0,0.9099
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,256,1,0,1.3408
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,512,1,0,2.6552
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1024,1,0,5.6963
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,0,0.7584
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,4,1,0,0.7610
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,8,1,0,0.7527
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,16,1,0,0.7756
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,32,1,0,0.8225
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,64,1,0,0.9120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,128,1,0,1.3333
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,256,1,0,2.5895
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,512,1,0,5.3301
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,0,0.7850
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,4,1,0,0.8052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,8,1,0,0.8237
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,16,1,0,0.8816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,32,1,0,0.9878
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,64,1,0,1.3863
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,128,1,0,2.5663
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,256,1,0,5.2406
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,0,0.9252
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,4,1,0,0.9577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,8,1,0,0.9980
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,16,1,0,1.1121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,32,1,0,1.5023
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,64,1,0,2.6714
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,128,1,0,5.1666
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,0,0.8301
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,4,1,0,0.8578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,8,1,0,0.8411
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,16,1,0,0.8534
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,32,1,0,0.8468
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,64,1,0,0.8403
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,128,1,0,0.8414
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,256,1,0,0.8494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,512,1,0,0.9029
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1024,1,0,1.0166
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,2048,1,0,1.4703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,4096,1,0,8.3864
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,8192,1,0,16.9621
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,16384,1,0,35.3188
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,0,0.8459
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,4,1,0,0.8449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,8,1,0,0.8382
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,16,1,0,0.8439
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,32,1,0,0.8254
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,64,1,0,0.8362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,128,1,0,0.8454
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,256,1,0,0.8965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,512,1,0,0.9899
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1024,1,0,1.3467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,2048,1,0,2.1662
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,4096,1,0,16.1492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,8192,1,0,33.5714
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,16384,1,0,71.9353
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,0,0.8399
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,4,1,0,0.8286
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,8,1,0,0.8377
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,16,1,0,0.8175
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,32,1,0,0.8292
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,64,1,0,0.8462
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,128,1,0,0.8908
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,256,1,0,0.9781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,512,1,0,1.2772
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1024,1,0,1.9256
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,2048,1,0,4.2276
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,4096,1,0,31.9351
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,8192,1,0,70.1277
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,0,0.8419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,4,1,0,0.8649
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,8,1,0,0.8665
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,16,1,0,0.8643
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,32,1,0,0.8577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,64,1,0,0.8527
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,128,1,0,0.8560
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,256,1,0,0.8569
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,512,1,0,0.8761
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1024,1,0,0.9328
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,2048,1,0,1.1030
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,4096,1,0,4.9943
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,8192,1,0,9.9689
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,16384,1,0,21.4072
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,0,0.8563
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,4,1,0,0.8694
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,8,1,0,0.8644
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,16,1,0,0.8498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,32,1,0,0.8551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,64,1,0,0.8585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,128,1,0,0.8648
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,256,1,0,0.8873
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,512,1,0,0.9377
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1024,1,0,1.0582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,2048,1,0,1.4197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,4096,1,0,9.3936
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,8192,1,0,19.8048
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,16384,1,0,45.6066
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,0,0.8557
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,4,1,0,0.8399
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,8,1,0,0.8522
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,16,1,0,0.8913
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,32,1,0,0.8554
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,64,1,0,0.8450
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,128,1,0,0.8822
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,256,1,0,0.9211
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,512,1,0,1.0080
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1024,1,0,1.2729
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,2048,1,0,2.2312
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,4096,1,0,18.7423
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,8192,1,0,40.0294
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,16384,1,0,93.6311
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,0,0.8558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,4,1,0,0.8642
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,8,1,0,0.8588
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,16,1,0,0.8868
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,32,1,0,0.8566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,64,1,0,0.8784
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,128,1,0,0.9251
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,256,1,0,1.0217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,512,1,0,1.2113
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1024,1,0,1.9573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,2048,1,0,4.2234
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,4096,1,0,37.5276
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,8192,1,0,86.0152
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,0,0.8502
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,4,1,0,0.8532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,8,1,0,0.8573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,16,1,0,0.8468
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,32,1,0,0.8702
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,64,1,0,0.9186
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,128,1,0,1.0134
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,256,1,0,1.1825
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,512,1,0,1.8236
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1024,1,0,3.7126
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,2048,1,0,9.0068
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,4096,1,0,82.9129
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,0,0.8492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,4,1,0,0.8588
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,8,1,0,0.8472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,16,1,0,0.8867
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,32,1,0,0.9271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,64,1,0,1.0144
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,128,1,0,1.1678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,256,1,0,1.7566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,512,1,0,3.4693
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1024,1,0,7.8417
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,2048,1,0,18.0967
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,0,0.8304
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,4,1,0,0.8583
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,8,1,0,0.8550
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,16,1,0,0.8591
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,32,1,0,0.8535
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,64,1,0,0.8444
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,128,1,0,0.8488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,256,1,0,0.8548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,512,1,0,0.8554
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1024,1,0,0.8686
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,2048,1,0,0.9295
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,4096,1,0,4.8426
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,8192,1,0,9.3872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,16384,1,0,20.3653
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,0,0.8628
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,4,1,0,0.8441
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,8,1,0,0.8456
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,16,1,0,0.8444
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,32,1,0,0.8414
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,64,1,0,0.8556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,128,1,0,0.8428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,256,1,0,0.8496
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,512,1,0,0.8705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1024,1,0,0.9257
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,2048,1,0,1.0688
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,4096,1,0,8.7390
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,8192,1,0,18.5668
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,16384,1,0,41.3681
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,0,0.8482
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,4,1,0,0.8421
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,8,1,0,0.8456
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,16,1,0,0.8722
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,32,1,0,0.8460
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,64,1,0,0.8735
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,128,1,0,0.8621
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,256,1,0,0.8830
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,512,1,0,0.9289
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1024,1,0,1.0195
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,2048,1,0,1.4301
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,4096,1,0,17.4215
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,8192,1,0,37.0351
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,16384,1,0,86.5860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,0,0.8467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,4,1,0,0.8578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,8,1,0,0.8586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,16,1,0,0.8567
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,32,1,0,0.8633
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,64,1,0,0.8710
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,128,1,0,0.8859
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,256,1,0,0.9350
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,512,1,0,0.9719
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1024,1,0,1.3015
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,2048,1,0,2.3045
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,4096,1,0,35.2063
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,8192,1,0,80.1041
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,0,0.8563
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,4,1,0,0.8444
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,8,1,0,0.8506
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,16,1,0,0.8411
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,32,1,0,0.8498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,64,1,0,0.8718
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,128,1,0,0.9190
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,256,1,0,0.9799
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,512,1,0,1.2319
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1024,1,0,2.0472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,2048,1,0,4.5967
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,4096,1,0,76.9968
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,0,0.8437
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,4,1,0,0.8516
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,8,1,0,0.8427
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,16,1,0,0.8497
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,32,1,0,0.8755
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,64,1,0,0.9182
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,128,1,0,0.9812
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,256,1,0,1.1952
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,512,1,0,1.9275
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1024,1,0,4.1002
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,2048,1,0,9.6604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,4096,1,0,197.3713
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,0,0.8449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,4,1,0,0.8495
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,8,1,0,0.8427
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,16,1,0,0.8703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,32,1,0,0.9077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,64,1,0,0.9819
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,128,1,0,1.1751
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,256,1,0,1.8508
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,512,1,0,3.7616
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1024,1,0,8.5486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,2048,1,0,19.4488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,0,0.8722
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,4,1,0,0.8830
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,8,1,0,0.9204
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,16,1,0,0.9734
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,32,1,0,1.0319
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,64,1,0,1.2404
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,128,1,0,1.8236
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,256,1,0,3.5588
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,512,1,0,7.7465
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1024,1,0,17.1563
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,0,1.0207
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,4,1,0,1.0514
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,8,1,0,1.0925
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,16,1,0,1.1598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,32,1,0,1.3550
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,64,1,0,1.9347
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,128,1,0,3.4822
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,256,1,0,7.4525
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,512,1,0,15.8406
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,0,1.2613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,4,1,0,1.3265
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,8,1,0,1.3985
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,16,1,0,1.5913
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,32,1,0,2.1646
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,64,1,0,3.7930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,128,1,0,7.2348
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,256,1,0,15.3261
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,0,1.7371
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,4,1,0,1.8751
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,8,1,0,2.0604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,16,1,0,2.6479
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,32,1,0,4.3819
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,64,1,0,7.8867
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,128,1,0,15.1420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,0,0.8671
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,4,1,0,0.8798
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,8,1,0,0.8746
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,16,1,0,0.9023
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,32,1,0,0.8861
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,64,1,0,0.8730
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,128,1,0,0.8700
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,256,1,0,0.8796
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,512,1,0,0.8726
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1024,1,0,0.8800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,2048,1,0,0.9010
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,4096,1,0,5.7565
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,8192,1,0,9.3718
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,16384,1,0,18.7493
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,0,0.8660
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,4,1,0,0.8605
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,8,1,0,0.8681
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,16,1,0,0.8639
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,32,1,0,0.8558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,64,1,0,0.8552
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,128,1,0,0.8603
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,256,1,0,0.8648
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,512,1,0,0.8633
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1024,1,0,0.9006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,2048,1,0,0.9239
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,4096,1,0,8.1687
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,8192,1,0,17.2106
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,16384,1,0,37.7014
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,0,0.8588
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,4,1,0,0.8575
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,8,1,0,0.8624
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,16,1,0,0.9009
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,32,1,0,0.8773
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,64,1,0,0.8743
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,128,1,0,0.8786
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,256,1,0,0.8641
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,512,1,0,0.8904
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1024,1,0,0.9409
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,2048,1,0,1.1003
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,4096,1,0,16.0664
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,8192,1,0,34.2115
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,16384,1,0,81.1110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,0,0.8549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,4,1,0,0.8639
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,8,1,0,0.8542
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,16,1,0,0.8643
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,32,1,0,0.8574
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,64,1,0,0.8672
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,128,1,0,0.8668
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,256,1,0,0.8904
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,512,1,0,0.9195
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1024,1,0,1.0318
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,2048,1,0,1.4449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,4096,1,0,32.3613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,8192,1,0,75.6106
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,0,0.8582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,4,1,0,0.8642
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,8,1,0,0.8665
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,16,1,0,0.8614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,32,1,0,0.8635
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,64,1,0,0.8655
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,128,1,0,0.8820
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,256,1,0,0.9273
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,512,1,0,1.0415
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1024,1,0,1.3325
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,2048,1,0,2.6075
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,4096,1,0,71.2300
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,0,0.8589
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,4,1,0,0.8630
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,8,1,0,0.8671
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,16,1,0,0.8731
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,32,1,0,0.8741
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,64,1,0,0.8874
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,128,1,0,0.9318
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,256,1,0,1.0142
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,512,1,0,1.2516
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1024,1,0,2.3500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,2048,1,0,5.4290
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,4096,1,0,187.4049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,0,0.8793
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,4,1,0,0.8637
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,8,1,0,0.8729
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,16,1,0,0.8720
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,32,1,0,0.8823
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,64,1,0,0.9225
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,128,1,0,1.0059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,256,1,0,1.2415
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,512,1,0,2.2271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1024,1,0,4.7602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,2048,1,0,10.8611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,0,0.8556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,4,1,0,0.8648
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,8,1,0,0.8681
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,16,1,0,0.9032
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,32,1,0,0.9347
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,64,1,0,1.0023
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,128,1,0,1.2109
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,256,1,0,2.1642
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,512,1,0,4.3995
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1024,1,0,9.9612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,0,0.9049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,4,1,0,0.9181
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,8,1,0,0.9448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,16,1,0,0.9757
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,32,1,0,1.0598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,64,1,0,1.2684
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,128,1,0,2.1394
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,256,1,0,4.3006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,512,1,0,9.2077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,0,1.0219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,4,1,0,1.0634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,8,1,0,1.0966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,16,1,0,1.1774
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,32,1,0,1.3763
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,64,1,0,2.2414
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,128,1,0,4.2732
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,256,1,0,8.9243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,0,1.2823
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,4,1,0,1.3679
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,8,1,0,1.4476
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,16,1,0,1.6347
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,32,1,0,2.4766
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,64,1,0,4.4791
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,128,1,0,8.7717
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,0,0.8299
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,4,1,0,0.8607
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,8,1,0,0.8747
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,16,1,0,0.8781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,32,1,0,0.8728
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,64,1,0,0.8619
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,128,1,0,0.8683
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,256,1,0,0.8806
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,512,1,0,0.8804
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1024,1,0,0.8818
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,2048,1,0,0.8795
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,4096,1,0,4.6416
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,8192,1,0,8.5355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,16384,1,0,18.3328
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,0,1.6453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,4,1,0,1.6144
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,8,1,0,1.6278
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,16,1,0,1.6201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,32,1,0,1.6603
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,64,1,0,1.6475
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,128,1,0,1.6247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,256,1,0,0.8490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,512,1,0,0.8548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1024,1,0,0.8504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,2048,1,0,0.8848
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,4096,1,0,7.9176
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,8192,1,0,16.5024
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,16384,1,0,36.3163
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,0,0.8507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,4,1,0,0.8602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,8,1,0,0.8646
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,16,1,0,0.8903
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,32,1,0,0.8726
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,64,1,0,0.8543
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,128,1,0,0.8537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,256,1,0,0.8681
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,512,1,0,0.8580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1024,1,0,0.8954
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,2048,1,0,0.9357
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,4096,1,0,15.4389
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,8192,1,0,32.8865
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,16384,1,0,78.7094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,0,0.8568
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,4,1,0,0.8507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,8,1,0,0.8669
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,16,1,0,0.8606
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,32,1,0,0.8606
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,64,1,0,0.8686
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,128,1,0,0.8658
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,256,1,0,0.8626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,512,1,0,0.8799
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1024,1,0,0.9383
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,2048,1,0,1.1140
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,4096,1,0,31.0182
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,8192,1,0,72.7378
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,0,0.8797
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,4,1,0,0.8574
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,8,1,0,0.8500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,16,1,0,0.8437
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,32,1,0,0.8596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,64,1,0,0.8681
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,128,1,0,0.8672
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,256,1,0,0.8796
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,512,1,0,0.9257
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1024,1,0,1.0502
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,2048,1,0,1.6970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,4096,1,0,68.1330
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,0,0.8595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,4,1,0,0.8511
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,8,1,0,0.8639
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,16,1,0,0.8567
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,32,1,0,0.8674
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,64,1,0,0.8683
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,128,1,0,0.8639
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,256,1,0,0.9234
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,512,1,0,1.0278
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1024,1,0,1.5732
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,2048,1,0,3.3065
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,4096,1,0,183.4254
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,0,0.8531
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,4,1,0,0.8503
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,8,1,0,0.8481
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,16,1,0,0.8647
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,32,1,0,0.8697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,64,1,0,0.8883
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,128,1,0,0.9347
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,256,1,0,1.0270
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,512,1,0,1.5213
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1024,1,0,3.0520
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,2048,1,0,6.6736
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,0,0.8562
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,4,1,0,0.8661
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,8,1,0,0.8678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,16,1,0,0.8601
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,32,1,0,0.8760
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,64,1,0,0.9274
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,128,1,0,1.0264
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,256,1,0,1.4767
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,512,1,0,2.9228
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1024,1,0,6.1690
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,0,0.8454
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,4,1,0,0.8683
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,8,1,0,0.8503
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,16,1,0,0.8767
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,32,1,0,0.9255
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,64,1,0,1.0195
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,128,1,0,1.4662
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,256,1,0,2.8472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,512,1,0,5.8098
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,0,0.8963
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,4,1,0,0.9116
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,8,1,0,0.9318
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,16,1,0,0.9806
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,32,1,0,1.0881
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,64,1,0,1.5200
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,128,1,0,2.8196
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,256,1,0,5.6895
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,0,1.0294
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,4,1,0,1.0462
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,8,1,0,1.1082
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,16,1,0,1.2047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,32,1,0,1.6350
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,64,1,0,2.9346
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,128,1,0,5.6327
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,0,0.5136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,4,1,0,0.5135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,8,1,0,0.5247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,16,1,0,0.4960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,32,1,0,0.5042
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,64,1,0,0.5195
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,128,1,0,0.5163
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,256,1,0,0.5517
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,512,1,0,0.6110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1024,1,0,0.7913
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,2048,1,0,1.3220
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,4096,1,0,4.8020
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,8192,1,0,10.3881
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,16384,1,0,21.7827
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,0,0.4842
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,4,1,0,0.4909
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,8,1,0,0.4828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,16,1,0,0.4909
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,32,1,0,0.4903
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,64,1,0,0.4939
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,128,1,0,0.5273
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,256,1,0,0.5789
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,512,1,0,0.7589
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1024,1,0,1.2186
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,2048,1,0,2.5962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,4096,1,0,9.8800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,8192,1,0,20.7308
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,16384,1,0,46.0982
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,0,0.4888
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,4,1,0,0.4869
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,8,1,0,0.4858
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,16,1,0,0.4972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,32,1,0,0.5056
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,64,1,0,0.5311
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,128,1,0,0.5847
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,256,1,0,0.7638
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,512,1,0,1.1769
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1024,1,0,2.3944
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,2048,1,0,5.4754
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,4096,1,0,20.3267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,8192,1,0,44.9070
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,16384,1,0,90.4294
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,0,0.4794
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,4,1,0,0.4839
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,8,1,0,0.4888
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,16,1,0,0.4912
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,32,1,0,0.5138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,64,1,0,0.5714
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,128,1,0,0.7376
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,256,1,0,1.1502
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,512,1,0,2.2636
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1024,1,0,4.9067
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,2048,1,0,11.3937
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,4096,1,0,39.9079
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,8192,1,0,86.3596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,0,0.4915
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,4,1,0,0.4993
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,8,1,0,0.5046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,16,1,0,0.5309
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,32,1,0,0.6078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,64,1,0,0.7490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,128,1,0,1.1349
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,256,1,0,2.2497
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,512,1,0,4.7754
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1024,1,0,10.5492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,2048,1,0,22.8516
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,4096,1,0,83.1328
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,0,0.5215
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,4,1,0,0.5489
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,8,1,0,0.5620
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,16,1,0,0.6300
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,32,1,0,0.7849
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,64,1,0,1.1640
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,128,1,0,2.2051
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,256,1,0,4.5751
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,512,1,0,10.0482
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1024,1,0,21.0708
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,2048,1,0,52.4059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,0,0.5905
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,4,1,0,0.6336
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,8,1,0,0.6990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,16,1,0,0.8539
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,32,1,0,1.2309
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,64,1,0,2.3028
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,128,1,0,4.6689
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,256,1,0,9.8539
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,512,1,0,20.5142
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1024,1,0,48.4945
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,2048,1,0,98.3948
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,0,0.7272
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,4,1,0,0.8288
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,8,1,0,0.9890
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,16,1,0,1.3670
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,32,1,0,2.3943
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,64,1,0,4.7337
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,128,1,0,9.9450
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,256,1,0,20.1171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,512,1,0,46.3594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1024,1,0,92.2572
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,0,1.0141
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,4,1,0,1.2575
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,8,1,0,1.6273
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,16,1,0,2.6473
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,32,1,0,5.0101
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,64,1,0,9.7810
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,128,1,0,19.7385
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,256,1,0,42.7371
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,512,1,0,87.6949
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,0,1.6075
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,4,1,0,2.1842
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,8,1,0,3.2716
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,16,1,0,5.5816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,32,1,0,10.5334
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,64,1,0,20.3170
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,128,1,0,40.3043
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,256,1,0,85.9124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,0,2.7610
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,4,1,0,4.5505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,8,1,0,6.9613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,16,1,0,11.7673
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,32,1,0,21.2970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,64,1,0,40.8226
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,128,1,0,85.9926
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,0,0.4716
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,4,1,0,0.4622
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,8,1,0,0.4567
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,16,1,0,0.4540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,32,1,0,0.4599
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,64,1,0,0.4611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,128,1,0,0.4613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,256,1,0,0.4674
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,512,1,0,0.4913
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1024,1,0,0.5908
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,2048,1,0,0.8176
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,4096,1,0,4.0284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,8192,1,0,8.4262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,16384,1,0,18.1291
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,0,0.5676
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,4,1,0,0.5656
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,8,1,0,0.5743
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,16,1,0,0.4635
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,32,1,0,0.4651
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,64,1,0,0.4552
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,128,1,0,0.4627
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,256,1,0,0.4995
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,512,1,0,0.5732
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1024,1,0,0.7551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,2048,1,0,1.3173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,4096,1,0,7.9419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,8192,1,0,17.1193
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,16384,1,0,36.2395
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,0,0.4603
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,4,1,0,0.4609
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,8,1,0,0.4562
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,16,1,0,0.4577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,32,1,0,0.4469
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,64,1,0,0.4588
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,128,1,0,0.4977
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,256,1,0,0.5813
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,512,1,0,0.7529
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1024,1,0,1.2338
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,2048,1,0,2.5750
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,4096,1,0,16.2483
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,8192,1,0,34.0079
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,16384,1,0,74.3138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,0,0.4645
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,4,1,0,0.4584
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,8,1,0,0.4630
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,16,1,0,0.4648
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,32,1,0,0.4721
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,64,1,0,0.5165
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,128,1,0,0.5907
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,256,1,0,0.7401
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,512,1,0,1.1838
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1024,1,0,2.4344
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,2048,1,0,5.4988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,4096,1,0,32.6339
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,8192,1,0,69.1225
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,0,0.4596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,4,1,0,0.4588
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,8,1,0,0.4653
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,16,1,0,0.4681
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,32,1,0,0.4972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,64,1,0,0.5770
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,128,1,0,0.7286
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,256,1,0,1.1653
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,512,1,0,2.3224
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1024,1,0,5.3070
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,2048,1,0,11.5728
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,4096,1,0,66.8453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,0,0.4848
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,4,1,0,0.4802
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,8,1,0,0.4904
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,16,1,0,0.5316
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,32,1,0,0.6087
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,64,1,0,0.7420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,128,1,0,1.1657
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,256,1,0,2.3209
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,512,1,0,4.9933
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1024,1,0,10.8879
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,2048,1,0,23.6631
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,0,0.5187
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,4,1,0,0.5184
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,8,1,0,0.5609
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,16,1,0,0.6343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,32,1,0,0.7737
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,64,1,0,1.1743
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,128,1,0,2.2629
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,256,1,0,4.8797
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,512,1,0,10.3780
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1024,1,0,21.6549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,2048,1,0,52.4110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,0,0.6335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,4,1,0,0.6551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,8,1,0,0.7158
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,16,1,0,0.8455
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,32,1,0,1.2436
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,64,1,0,2.3112
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,128,1,0,4.8262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,256,1,0,10.3158
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,512,1,0,20.8949
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1024,1,0,48.7901
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,0,0.7872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,4,1,0,0.9215
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,8,1,0,1.0305
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,16,1,0,1.3706
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,32,1,0,2.4512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,64,1,0,4.8505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,128,1,0,10.2052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,256,1,0,20.6033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,512,1,0,45.2750
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,0,1.1803
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,4,1,0,1.2543
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,8,1,0,1.6504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,16,1,0,2.7622
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,32,1,0,5.0850
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,64,1,0,10.1296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,128,1,0,20.3788
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,256,1,0,41.0829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,0,1.6333
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,4,1,0,2.1999
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,8,1,0,3.3433
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,16,1,0,5.8392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,32,1,0,10.9239
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,64,1,0,20.1735
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,128,1,0,40.5808
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,0,0.4644
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,4,1,0,0.4545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,8,1,0,0.4491
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,16,1,0,0.4517
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,32,1,0,0.4582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,64,1,0,0.4499
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,128,1,0,0.4549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,256,1,0,0.4566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,512,1,0,0.4574
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1024,1,0,0.5007
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,2048,1,0,0.6268
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,4096,1,0,3.4757
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,8192,1,0,6.8320
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,16384,1,0,14.9050
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,0,0.5031
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,4,1,0,0.5081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,8,1,0,0.4984
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,16,1,0,0.5124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,32,1,0,0.4955
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,64,1,0,0.4952
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,128,1,0,0.5110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,256,1,0,0.5141
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,512,1,0,0.5158
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1024,1,0,0.6008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,2048,1,0,0.8183
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,4096,1,0,6.7443
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,8192,1,0,13.7312
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,16384,1,0,30.1279
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,0,0.4668
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,4,1,0,0.4705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,8,1,0,0.4774
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,16,1,0,0.4762
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,32,1,0,0.4735
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,64,1,0,0.4724
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,128,1,0,0.4784
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,256,1,0,0.5123
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,512,1,0,0.5984
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1024,1,0,0.7840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,2048,1,0,1.3662
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,4096,1,0,15.0507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,8192,1,0,27.9228
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,16384,1,0,62.3128
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,0,0.4535
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,4,1,0,0.4542
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,8,1,0,0.4532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,16,1,0,0.4564
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,32,1,0,0.4547
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,64,1,0,0.4713
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,128,1,0,0.5072
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,256,1,0,0.5832
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,512,1,0,0.7417
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1024,1,0,1.2715
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,2048,1,0,2.7698
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,4096,1,0,26.5095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,8192,1,0,58.3144
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,0,0.4537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,4,1,0,0.4490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,8,1,0,0.4518
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,16,1,0,0.4494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,32,1,0,0.4652
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,64,1,0,0.5031
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,128,1,0,0.5778
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,256,1,0,0.7307
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,512,1,0,1.2223
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1024,1,0,2.5357
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,2048,1,0,5.8115
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,4096,1,0,53.0610
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,0,0.4507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,4,1,0,0.4529
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,8,1,0,0.4544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,16,1,0,0.4660
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,32,1,0,0.5087
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,64,1,0,0.5774
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,128,1,0,0.7284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,256,1,0,1.2090
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,512,1,0,2.5286
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1024,1,0,5.4454
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,2048,1,0,12.1390
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,0,0.4772
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,4,1,0,0.4655
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,8,1,0,0.4756
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,16,1,0,0.5233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,32,1,0,0.5869
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,64,1,0,0.7334
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,128,1,0,1.1988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,256,1,0,2.4423
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,512,1,0,5.2415
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1024,1,0,11.2094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,2048,1,0,24.2821
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,0,0.5085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,4,1,0,0.5288
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,8,1,0,0.5531
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,16,1,0,0.6282
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,32,1,0,0.7671
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,64,1,0,1.2190
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,128,1,0,2.4096
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,256,1,0,5.1306
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,512,1,0,10.7293
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1024,1,0,22.4361
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,0,0.5988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,4,1,0,0.6362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,8,1,0,0.6949
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,16,1,0,0.8415
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,32,1,0,1.2878
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,64,1,0,2.4124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,128,1,0,4.9741
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,256,1,0,10.5771
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,512,1,0,21.4445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,0,0.7559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,4,1,0,0.8454
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,8,1,0,0.9701
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,16,1,0,1.4176
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,32,1,0,2.5398
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,64,1,0,5.0767
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,128,1,0,10.0307
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,256,1,0,21.1249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,0,1.0074
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,4,1,0,1.2500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,8,1,0,1.6962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,16,1,0,2.8265
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,32,1,0,5.0013
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,64,1,0,10.2162
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,128,1,0,20.6698
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,0,0.4607
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,4,1,0,0.4588
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,8,1,0,0.4445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,16,1,0,0.4500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,32,1,0,0.4518
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,64,1,0,0.4475
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,128,1,0,0.4472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,256,1,0,0.4478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,512,1,0,0.4507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1024,1,0,0.4537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,2048,1,0,0.5170
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,4096,1,0,3.3278
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,8192,1,0,5.8525
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,16384,1,0,13.4256
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,0,0.4682
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,4,1,0,0.4942
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,8,1,0,0.4881
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,16,1,0,0.4936
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,32,1,0,0.4825
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,64,1,0,0.4872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,128,1,0,0.4906
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,256,1,0,0.4902
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,512,1,0,0.4907
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1024,1,0,0.5346
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,2048,1,0,0.6702
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,4096,1,0,5.8350
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,8192,1,0,12.4240
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,16384,1,0,27.2783
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,0,0.4485
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,4,1,0,0.4548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,8,1,0,0.4504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,16,1,0,0.4525
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,32,1,0,0.4589
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,64,1,0,0.4499
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,128,1,0,0.4527
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,256,1,0,0.4681
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,512,1,0,0.5084
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1024,1,0,0.5931
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,2048,1,0,0.8296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,4096,1,0,11.7318
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,8192,1,0,24.9798
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,16384,1,0,55.8831
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,0,0.4589
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,4,1,0,0.4524
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,8,1,0,0.4473
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,16,1,0,0.4582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,32,1,0,0.4591
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,64,1,0,0.4616
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,128,1,0,0.4697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,256,1,0,0.5082
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,512,1,0,0.5880
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1024,1,0,0.7776
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,2048,1,0,1.4901
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,4096,1,0,23.3210
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,8192,1,0,51.6855
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,0,0.4611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,4,1,0,0.4556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,8,1,0,0.4580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,16,1,0,0.4617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,32,1,0,0.4568
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,64,1,0,0.4632
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,128,1,0,0.5071
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,256,1,0,0.5737
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,512,1,0,0.7492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1024,1,0,1.3930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,2048,1,0,3.0019
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,4096,1,0,46.8356
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,0,0.4602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,4,1,0,0.4609
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,8,1,0,0.4580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,16,1,0,0.4598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,32,1,0,0.4697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,64,1,0,0.5092
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,128,1,0,0.5748
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,256,1,0,0.7386
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,512,1,0,1.3433
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1024,1,0,2.7618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,2048,1,0,6.1322
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,4096,1,0,162.4983
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,0,0.4592
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,4,1,0,0.4627
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,8,1,0,0.4562
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,16,1,0,0.4712
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,32,1,0,0.5151
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,64,1,0,0.5773
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,128,1,0,0.7404
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,256,1,0,1.3508
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,512,1,0,2.6556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1024,1,0,5.9477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,2048,1,0,12.7922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,0,0.4809
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,4,1,0,0.4730
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,8,1,0,0.4930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,16,1,0,0.5257
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,32,1,0,0.5906
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,64,1,0,0.7402
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,128,1,0,1.3102
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,256,1,0,2.6049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,512,1,0,5.6268
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1024,1,0,11.9955
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,0,0.5081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,4,1,0,0.5241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,8,1,0,0.5635
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,16,1,0,0.6257
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,32,1,0,0.7750
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,64,1,0,1.3243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,128,1,0,2.6065
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,256,1,0,5.4634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,512,1,0,11.3204
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,0,0.5804
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,4,1,0,0.6281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,8,1,0,0.6942
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,16,1,0,0.8490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,32,1,0,1.3810
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,64,1,0,2.5876
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,128,1,0,5.2847
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,256,1,0,11.1212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,0,0.7503
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,4,1,0,0.8418
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,8,1,0,0.9870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,16,1,0,1.5106
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,32,1,0,2.7438
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,64,1,0,5.4114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,128,1,0,11.1290
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,0,0.5791
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,4,1,0,0.5933
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,8,1,0,0.5824
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,16,1,0,0.5920
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,32,1,0,0.5927
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,64,1,0,0.5952
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,128,1,0,0.6065
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,256,1,0,0.6344
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,512,1,0,0.6827
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1024,1,0,0.8742
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,2048,1,0,1.4095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,4096,1,0,5.7910
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,8192,1,0,12.5809
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,16384,1,0,26.5537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,0,0.9262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,4,1,0,1.1020
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,8,1,0,1.1063
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,16,1,0,1.1112
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,32,1,0,1.0977
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,64,1,0,1.0578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,128,1,0,1.1126
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,256,1,0,1.2151
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,512,1,0,1.3355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1024,1,0,1.3083
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,2048,1,0,2.5819
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,4096,1,0,12.0771
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,8192,1,0,25.4073
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,16384,1,0,54.9573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,0,0.5950
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,4,1,0,0.5885
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,8,1,0,0.5932
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,16,1,0,0.5877
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,32,1,0,0.5878
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,64,1,0,0.6260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,128,1,0,0.6925
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,256,1,0,0.8557
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,512,1,0,1.2689
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1024,1,0,2.4173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,2048,1,0,5.4649
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,4096,1,0,24.4394
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,8192,1,0,52.2537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,16384,1,0,116.6008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,0,0.5851
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,4,1,0,0.5753
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,8,1,0,0.6030
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,16,1,0,0.6026
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,32,1,0,0.6268
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,64,1,0,0.6824
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,128,1,0,0.8306
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,256,1,0,1.2546
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,512,1,0,2.3558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1024,1,0,5.1582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,2048,1,0,11.7155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,4096,1,0,51.1060
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,8192,1,0,112.6966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,0,0.5981
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,4,1,0,0.6046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,8,1,0,0.6062
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,16,1,0,0.6320
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,32,1,0,0.6902
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,64,1,0,0.8502
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,128,1,0,1.2328
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,256,1,0,2.2870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,512,1,0,4.8720
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1024,1,0,10.8087
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,2048,1,0,23.0086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,4096,1,0,109.9189
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,0,0.6513
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,4,1,0,0.6634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,8,1,0,0.6790
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,16,1,0,0.7452
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,32,1,0,0.8875
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,64,1,0,1.2587
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,128,1,0,2.2433
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,256,1,0,4.8093
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,512,1,0,10.4234
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1024,1,0,20.9279
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,2048,1,0,53.0899
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,0,0.7127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,4,1,0,0.7391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,8,1,0,0.8477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,16,1,0,0.9553
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,32,1,0,1.3255
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,64,1,0,2.3163
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,128,1,0,4.7027
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,256,1,0,10.1157
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,512,1,0,20.7259
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1024,1,0,47.9463
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,2048,1,0,99.0659
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,0,0.8969
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,4,1,0,0.9714
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,8,1,0,1.1007
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,16,1,0,1.4689
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,32,1,0,2.4525
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,64,1,0,4.8774
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,128,1,0,9.9695
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,256,1,0,20.3888
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,512,1,0,46.3729
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1024,1,0,92.8835
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,0,1.2419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,4,1,0,1.3859
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,8,1,0,1.7447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,16,1,0,2.7107
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,32,1,0,5.0780
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,64,1,0,10.1827
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,128,1,0,19.6556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,256,1,0,43.5960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,512,1,0,89.4106
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,0,1.8801
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,4,1,0,2.2956
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,8,1,0,3.3899
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,16,1,0,5.8675
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,32,1,0,10.5576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,64,1,0,20.2134
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,128,1,0,43.0515
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,256,1,0,88.6737
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,0,2.9376
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,4,1,0,4.5696
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,8,1,0,7.1633
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,16,1,0,11.9609
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,32,1,0,21.5990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,64,1,0,40.7547
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,128,1,0,86.1955
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,0,0.5480
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,4,1,0,0.5547
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,8,1,0,0.5562
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,16,1,0,0.5579
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,32,1,0,0.5528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,64,1,0,0.5561
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,128,1,0,0.5488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,256,1,0,0.5645
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,512,1,0,0.5900
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1024,1,0,0.6922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,2048,1,0,0.9184
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,4096,1,0,5.1798
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,8192,1,0,10.3996
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,16384,1,0,21.9474
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,0,0.5530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,4,1,0,0.5568
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,8,1,0,0.5527
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,16,1,0,0.5613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,32,1,0,0.5492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,64,1,0,0.5495
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,128,1,0,0.5669
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,256,1,0,0.5942
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,512,1,0,0.6734
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1024,1,0,0.8559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,2048,1,0,1.3994
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,4096,1,0,10.0963
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,8192,1,0,21.2271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,16384,1,0,45.3894
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,0,0.5448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,4,1,0,0.5592
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,8,1,0,0.5520
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,16,1,0,0.5596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,32,1,0,0.5552
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,64,1,0,0.5657
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,128,1,0,0.6042
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,256,1,0,0.6745
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,512,1,0,0.8358
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1024,1,0,1.3117
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,2048,1,0,2.6640
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,4096,1,0,19.8742
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,8192,1,0,42.6652
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,16384,1,0,95.5614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,0,0.5467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,4,1,0,0.5581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,8,1,0,0.5592
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,16,1,0,0.5595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,32,1,0,0.5617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,64,1,0,0.6049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,128,1,0,0.6773
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,256,1,0,0.8226
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,512,1,0,1.2617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1024,1,0,2.5073
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,2048,1,0,5.8343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,4096,1,0,40.2545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,8192,1,0,91.4254
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,0,0.5544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,4,1,0,0.5594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,8,1,0,0.5590
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,16,1,0,0.5656
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,32,1,0,0.5929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,64,1,0,0.6780
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,128,1,0,0.8248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,256,1,0,1.2505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,512,1,0,2.3742
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1024,1,0,5.1594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,2048,1,0,11.7424
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,4096,1,0,88.1899
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,0,0.5653
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,4,1,0,0.5734
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,8,1,0,0.5860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,16,1,0,0.6211
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,32,1,0,0.6939
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,64,1,0,0.8364
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,128,1,0,1.2313
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,256,1,0,2.3009
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,512,1,0,5.0556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1024,1,0,11.0197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,2048,1,0,23.5086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,0,0.6026
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,4,1,0,0.6195
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,8,1,0,0.6551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,16,1,0,0.7202
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,32,1,0,0.8664
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,64,1,0,1.2562
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,128,1,0,2.2930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,256,1,0,4.9325
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,512,1,0,10.4983
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1024,1,0,21.4991
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,2048,1,0,51.8583
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,0,0.7050
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,4,1,0,0.7396
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,8,1,0,0.8098
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,16,1,0,0.9477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,32,1,0,1.3304
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,64,1,0,2.3685
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,128,1,0,4.8244
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,256,1,0,10.3031
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,512,1,0,21.1367
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1024,1,0,49.5990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,0,0.8718
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,4,1,0,1.0141
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,8,1,0,1.0831
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,16,1,0,1.4663
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,32,1,0,2.5250
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,64,1,0,4.9115
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,128,1,0,10.1585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,256,1,0,20.8201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,512,1,0,45.5664
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,0,1.1948
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,4,1,0,1.3788
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,8,1,0,1.7237
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,16,1,0,2.7803
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,32,1,0,5.1413
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,64,1,0,10.1685
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,128,1,0,20.4638
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,256,1,0,42.7258
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,0,1.8465
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,4,1,0,2.3047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,8,1,0,3.4288
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,16,1,0,5.9685
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,32,1,0,10.6683
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,64,1,0,20.1606
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,128,1,0,41.1004
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,0,0.5398
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,4,1,0,0.5615
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,8,1,0,0.5528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,16,1,0,0.5633
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,32,1,0,0.5657
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,64,1,0,0.5586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,128,1,0,0.5468
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,256,1,0,0.5410
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,512,1,0,0.5557
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1024,1,0,0.6028
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,2048,1,0,0.7335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,4096,1,0,4.5994
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,8192,1,0,8.8577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,16384,1,0,18.9092
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,0,0.6062
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,4,1,0,0.5718
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,8,1,0,0.5418
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,16,1,0,0.5548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,32,1,0,0.5442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,64,1,0,0.5393
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,128,1,0,0.5471
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,256,1,0,0.5662
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,512,1,0,0.5939
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1024,1,0,0.6951
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,2048,1,0,0.9165
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,4096,1,0,8.7920
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,8192,1,0,17.9032
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,16384,1,0,38.1941
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,0,0.5536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,4,1,0,0.5604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,8,1,0,0.5519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,16,1,0,0.5520
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,32,1,0,0.5579
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,64,1,0,0.5477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,128,1,0,0.5612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,256,1,0,0.6004
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,512,1,0,0.6817
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1024,1,0,0.8687
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,2048,1,0,1.4191
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,4096,1,0,17.1432
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,8192,1,0,36.2032
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,16384,1,0,82.5001
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,0,0.5385
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,4,1,0,0.5487
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,8,1,0,0.5503
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,16,1,0,0.5433
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,32,1,0,0.5507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,64,1,0,0.5573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,128,1,0,0.5993
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,256,1,0,0.6718
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,512,1,0,0.8424
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1024,1,0,1.3220
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,2048,1,0,2.9111
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,4096,1,0,34.5048
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,8192,1,0,79.3215
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,0,0.5378
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,4,1,0,0.5448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,8,1,0,0.5497
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,16,1,0,0.5489
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,32,1,0,0.5610
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,64,1,0,0.5958
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,128,1,0,0.6675
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,256,1,0,0.8250
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,512,1,0,1.2734
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1024,1,0,2.6705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,2048,1,0,6.0852
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,4096,1,0,75.5551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,0,0.5516
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,4,1,0,0.5521
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,8,1,0,0.5449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,16,1,0,0.5552
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,32,1,0,0.5965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,64,1,0,0.6781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,128,1,0,0.8222
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,256,1,0,1.2588
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,512,1,0,2.5998
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1024,1,0,5.4770
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,2048,1,0,12.2770
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,0,0.5578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,4,1,0,0.5573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,8,1,0,0.5660
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,16,1,0,0.6079
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,32,1,0,0.6900
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,64,1,0,0.8319
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,128,1,0,1.2494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,256,1,0,2.5018
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,512,1,0,5.3971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1024,1,0,11.2251
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,2048,1,0,24.4815
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,0,0.6228
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,4,1,0,0.6221
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,8,1,0,0.6540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,16,1,0,0.7246
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,32,1,0,0.8670
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,64,1,0,1.2697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,128,1,0,2.4840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,256,1,0,5.1629
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,512,1,0,10.9306
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1024,1,0,22.7391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,0,0.6945
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,4,1,0,0.7255
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,8,1,0,0.7848
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,16,1,0,0.9316
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,32,1,0,1.3295
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,64,1,0,2.5297
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,128,1,0,5.0796
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,256,1,0,10.5388
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,512,1,0,21.8116
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,0,0.8446
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,4,1,0,0.9507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,8,1,0,1.0662
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,16,1,0,1.4737
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,32,1,0,2.6627
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,64,1,0,5.1774
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,128,1,0,10.5338
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,256,1,0,21.3236
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,0,1.1075
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,4,1,0,1.3323
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,8,1,0,1.7427
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,16,1,0,2.9268
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,32,1,0,5.4265
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,64,1,0,10.5122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,128,1,0,21.2986
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,0,0.5572
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,4,1,0,0.5623
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,8,1,0,0.5544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,16,1,0,0.5717
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,32,1,0,0.5619
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,64,1,0,0.5613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,128,1,0,0.5597
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,256,1,0,0.5626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,512,1,0,0.5614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1024,1,0,0.5690
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,2048,1,0,0.6299
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,4096,1,0,4.3507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,8192,1,0,8.1643
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,16384,1,0,17.3839
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,0,0.5913
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,4,1,0,0.5843
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,8,1,0,0.5726
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,16,1,0,0.5532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,32,1,0,0.5494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,64,1,0,0.5502
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,128,1,0,0.5509
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,256,1,0,0.5548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,512,1,0,0.5667
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1024,1,0,0.6079
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,2048,1,0,0.7296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,4096,1,0,7.8173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,8192,1,0,16.3869
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,16384,1,0,35.1584
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,0,0.5541
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,4,1,0,0.5570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,8,1,0,0.5509
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,16,1,0,0.5544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,32,1,0,0.5544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,64,1,0,0.5636
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,128,1,0,0.5532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,256,1,0,0.5709
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,512,1,0,0.6125
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1024,1,0,0.6955
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,2048,1,0,0.9296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,4096,1,0,15.5352
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,8192,1,0,32.7756
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,16384,1,0,76.8295
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,0,0.5442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,4,1,0,0.5556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,8,1,0,0.5564
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,16,1,0,0.5442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,32,1,0,0.5577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,64,1,0,0.5602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,128,1,0,0.5694
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,256,1,0,0.6027
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,512,1,0,0.6776
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1024,1,0,0.8721
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,2048,1,0,1.5669
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,4096,1,0,31.4121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,8192,1,0,73.1411
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,0,0.5558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,4,1,0,0.5533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,8,1,0,0.5494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,16,1,0,0.5577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,32,1,0,0.5561
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,64,1,0,0.5647
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,128,1,0,0.6008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,256,1,0,0.6732
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,512,1,0,0.8467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1024,1,0,1.4652
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,2048,1,0,3.1485
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,4096,1,0,69.6345
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,0,0.5618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,4,1,0,0.5583
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,8,1,0,0.5550
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,16,1,0,0.5557
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,32,1,0,0.5631
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,64,1,0,0.6073
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,128,1,0,0.6647
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,256,1,0,0.8407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,512,1,0,1.4221
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1024,1,0,2.8774
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,2048,1,0,6.4597
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,0,0.5517
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,4,1,0,0.5631
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,8,1,0,0.5504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,16,1,0,0.5657
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,32,1,0,0.6072
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,64,1,0,0.6782
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,128,1,0,0.8387
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,256,1,0,1.3954
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,512,1,0,2.7974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1024,1,0,6.1122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,2048,1,0,13.1599
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,0,0.5633
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,4,1,0,0.5792
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,8,1,0,0.5887
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,16,1,0,0.6095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,32,1,0,0.6960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,64,1,0,0.8374
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,128,1,0,1.3822
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,256,1,0,2.7703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,512,1,0,5.7852
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1024,1,0,12.3055
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,0,0.6054
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,4,1,0,0.6173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,8,1,0,0.6538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,16,1,0,0.7206
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,32,1,0,0.8721
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,64,1,0,1.3985
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,128,1,0,2.6929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,256,1,0,5.6224
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,512,1,0,11.9624
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,0,0.6796
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,4,1,0,0.7331
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,8,1,0,0.7962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,16,1,0,0.9448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,32,1,0,1.4585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,64,1,0,2.7672
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,128,1,0,5.6053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,256,1,0,11.8296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,0,0.8195
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,4,1,0,0.9396
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,8,1,0,1.0760
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,16,1,0,1.5916
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,32,1,0,2.8691
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,64,1,0,5.6378
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,128,1,0,11.3536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,0,0.7928
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,4,1,0,0.7717
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,8,1,0,0.7646
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,16,1,0,0.7846
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,32,1,0,0.7684
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,64,1,0,0.7677
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,128,1,0,0.7718
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,256,1,0,0.7768
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,512,1,0,0.8045
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1024,1,0,0.8764
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,2048,1,0,1.2327
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,4096,1,0,4.2222
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,8192,1,0,8.0996
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,16384,1,0,17.9274
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,0,1.4966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,4,1,0,1.4797
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,8,1,0,1.4833
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,16,1,0,1.4570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,32,1,0,1.4991
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,64,1,0,1.4932
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,128,1,0,1.4797
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,256,1,0,1.4579
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,512,1,0,1.5816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1024,1,0,1.1325
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,2048,1,0,1.8982
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,4096,1,0,7.6648
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,8192,1,0,16.7293
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,16384,1,0,35.7842
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,0,0.7662
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,4,1,0,0.7598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,8,1,0,0.7512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,16,1,0,0.7418
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,32,1,0,0.7582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,64,1,0,0.7571
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,128,1,0,0.7803
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,256,1,0,0.8579
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,512,1,0,1.0825
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1024,1,0,1.6964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,2048,1,0,3.5998
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,4096,1,0,15.8272
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,8192,1,0,33.5539
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,16384,1,0,71.8285
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,0,0.7615
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,4,1,0,0.7548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,8,1,0,0.7537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,16,1,0,0.7617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,32,1,0,0.7726
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,64,1,0,0.7943
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,128,1,0,0.8551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,256,1,0,1.0539
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,512,1,0,1.6084
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1024,1,0,3.0988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,2048,1,0,7.4630
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,4096,1,0,32.0335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,8192,1,0,68.2770
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,0,0.7670
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,4,1,0,0.7491
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,8,1,0,0.7750
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,16,1,0,0.7705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,32,1,0,0.7985
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,64,1,0,0.8689
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,128,1,0,1.0455
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,256,1,0,1.5739
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,512,1,0,2.8647
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1024,1,0,6.4558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,2048,1,0,14.9986
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,4096,1,0,67.3570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,0,0.7735
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,4,1,0,0.7768
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,8,1,0,0.7924
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,16,1,0,0.7882
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,32,1,0,0.8777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,64,1,0,1.0684
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,128,1,0,1.5458
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,256,1,0,2.8287
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,512,1,0,6.1481
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1024,1,0,13.2067
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,2048,1,0,29.8996
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,0,0.7995
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,4,1,0,0.8375
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,8,1,0,0.8470
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,16,1,0,0.9240
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,32,1,0,1.1202
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,64,1,0,1.5795
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,128,1,0,2.7271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,256,1,0,5.8163
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,512,1,0,12.4217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1024,1,0,26.8176
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,0,0.9302
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,4,1,0,0.9721
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,8,1,0,1.0461
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,16,1,0,1.2391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,32,1,0,1.7086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,64,1,0,2.7923
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,128,1,0,5.5008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,256,1,0,12.0174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,512,1,0,24.6824
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,0,1.2347
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,4,1,0,1.3217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,8,1,0,1.5143
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,16,1,0,1.9533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,32,1,0,3.0780
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,64,1,0,5.8297
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,128,1,0,11.7675
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,256,1,0,23.9538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,0,1.7344
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,4,1,0,2.0458
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,8,1,0,2.4679
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,16,1,0,3.6241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,32,1,0,6.5167
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,64,1,0,11.9294
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,128,1,0,23.3711
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,0,2.8297
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,4,1,0,3.5982
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,8,1,0,4.8599
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,16,1,0,7.6320
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,32,1,0,13.5070
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,64,1,0,24.1551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,0,0.7713
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,4,1,0,0.7680
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,8,1,0,0.7704
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,16,1,0,0.7602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,32,1,0,0.7604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,64,1,0,0.7549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,128,1,0,0.7587
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,256,1,0,0.7490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,512,1,0,0.7561
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1024,1,0,0.8085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,2048,1,0,0.9385
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,4096,1,0,3.9107
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,8192,1,0,7.4780
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,16384,1,0,15.9219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,0,1.5492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,4,1,0,1.4817
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,8,1,0,1.4462
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,16,1,0,1.5001
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,32,1,0,1.4719
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,64,1,0,1.4719
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,128,1,0,1.4620
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,256,1,0,1.5009
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,512,1,0,1.4973
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1024,1,0,1.5754
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,2048,1,0,1.8207
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,4096,1,0,6.7223
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,8192,1,0,14.4121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,16384,1,0,30.8441
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,0,0.7545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,4,1,0,0.7590
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,8,1,0,0.7545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,16,1,0,0.7549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,32,1,0,0.7577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,64,1,0,0.7536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,128,1,0,0.7605
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,256,1,0,0.7980
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,512,1,0,0.8642
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1024,1,0,1.1317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,2048,1,0,1.9436
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,4096,1,0,13.7165
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,8192,1,0,28.8989
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,16384,1,0,63.8418
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,0,0.7623
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,4,1,0,0.7601
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,8,1,0,0.7643
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,16,1,0,0.7500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,32,1,0,0.7617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,64,1,0,0.7624
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,128,1,0,0.7989
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,256,1,0,0.8659
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,512,1,0,1.0885
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1024,1,0,1.7331
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,2048,1,0,3.7200
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,4096,1,0,27.2483
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,8192,1,0,61.0030
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,0,0.7474
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,4,1,0,0.7565
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,8,1,0,0.7596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,16,1,0,0.7514
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,32,1,0,0.7747
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,64,1,0,0.7954
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,128,1,0,0.8749
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,256,1,0,1.0658
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,512,1,0,1.6353
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1024,1,0,3.2105
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,2048,1,0,7.8517
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,4096,1,0,56.1287
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,0,0.7580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,4,1,0,0.7544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,8,1,0,0.7610
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,16,1,0,0.7689
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,32,1,0,0.8055
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,64,1,0,0.8754
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,128,1,0,1.0651
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,256,1,0,1.5958
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,512,1,0,3.1096
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1024,1,0,6.9154
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,2048,1,0,15.4840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,0,0.7438
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,4,1,0,0.7596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,8,1,0,0.7535
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,16,1,0,0.7841
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,32,1,0,0.8688
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,64,1,0,1.0663
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,128,1,0,1.5736
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,256,1,0,2.9089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,512,1,0,6.4733
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1024,1,0,13.6426
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,2048,1,0,31.0890
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,0,0.8377
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,4,1,0,0.8415
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,8,1,0,0.8592
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,16,1,0,0.9435
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,32,1,0,1.1286
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,64,1,0,1.6014
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,128,1,0,2.8218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,256,1,0,6.0875
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,512,1,0,12.9961
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1024,1,0,27.6872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,0,0.9774
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,4,1,0,0.9978
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,8,1,0,1.0652
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,16,1,0,1.2477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,32,1,0,1.7285
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,64,1,0,2.9144
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,128,1,0,5.9136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,256,1,0,12.6150
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,512,1,0,26.1005
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,0,1.2445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,4,1,0,1.3679
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,8,1,0,1.5264
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,16,1,0,1.9784
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,32,1,0,3.1692
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,64,1,0,6.0872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,128,1,0,12.3431
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,256,1,0,25.0570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,0,1.7707
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,4,1,0,2.0795
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,8,1,0,2.5163
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,16,1,0,3.6381
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,32,1,0,6.5070
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,64,1,0,12.2929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,128,1,0,24.4263
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,0,0.7729
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,4,1,0,0.7763
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,8,1,0,0.7581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,16,1,0,0.7624
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,32,1,0,0.7644
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,64,1,0,0.7729
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,128,1,0,0.7628
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,256,1,0,0.7558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,512,1,0,0.7780
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1024,1,0,0.7673
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,2048,1,0,0.7993
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,4096,1,0,3.5730
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,8192,1,0,6.4021
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,16384,1,0,13.4877
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,0,0.8215
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,4,1,0,0.8241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,8,1,0,0.8166
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,16,1,0,0.8049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,32,1,0,0.7462
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,64,1,0,0.7612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,128,1,0,0.7466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,256,1,0,0.7422
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,512,1,0,0.7632
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1024,1,0,0.8011
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,2048,1,0,0.9289
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,4096,1,0,5.9868
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,8192,1,0,12.5236
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,16384,1,0,27.2492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,0,0.7672
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,4,1,0,0.7535
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,8,1,0,0.7546
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,16,1,0,0.7551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,32,1,0,0.7575
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,64,1,0,0.7671
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,128,1,0,0.7478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,256,1,0,0.7618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,512,1,0,0.8094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1024,1,0,0.8800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,2048,1,0,1.2499
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,4096,1,0,11.8999
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,8192,1,0,25.0349
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,16384,1,0,57.5864
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,0,0.7525
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,4,1,0,0.7486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,8,1,0,0.7333
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,16,1,0,0.7411
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,32,1,0,0.7520
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,64,1,0,0.7479
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,128,1,0,0.7510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,256,1,0,0.7974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,512,1,0,0.8623
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1024,1,0,1.1416
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,2048,1,0,1.9841
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,4096,1,0,23.3379
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,8192,1,0,52.4203
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,0,0.7463
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,4,1,0,0.7429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,8,1,0,0.7445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,16,1,0,0.7495
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,32,1,0,0.7462
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,64,1,0,0.7518
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,128,1,0,0.7919
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,256,1,0,0.8559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,512,1,0,1.0917
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1024,1,0,1.7857
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,2048,1,0,3.9403
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,4096,1,0,47.1498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,0,0.7472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,4,1,0,0.7475
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,8,1,0,0.7495
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,16,1,0,0.7519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,32,1,0,0.7554
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,64,1,0,0.7896
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,128,1,0,0.8631
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,256,1,0,1.0807
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,512,1,0,1.6812
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1024,1,0,3.4892
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,2048,1,0,8.2505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,0,0.7404
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,4,1,0,0.7532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,8,1,0,0.7510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,16,1,0,0.7563
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,32,1,0,0.7946
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,64,1,0,0.8592
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,128,1,0,1.0623
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,256,1,0,1.6314
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,512,1,0,3.2305
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1024,1,0,7.3960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,2048,1,0,16.5750
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,0,0.7672
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,4,1,0,0.7634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,8,1,0,0.7666
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,16,1,0,0.7954
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,32,1,0,0.8637
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,64,1,0,1.0757
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,128,1,0,1.6059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,256,1,0,3.1171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,512,1,0,6.8873
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1024,1,0,14.8311
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,0,0.8058
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,4,1,0,0.8350
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,8,1,0,0.8650
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,16,1,0,0.9765
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,32,1,0,1.1355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,64,1,0,1.6397
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,128,1,0,3.0619
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,256,1,0,6.6385
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,512,1,0,13.8966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,0,0.9325
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,4,1,0,0.9980
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,8,1,0,1.0784
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,16,1,0,1.2627
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,32,1,0,1.7562
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,64,1,0,3.1126
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,128,1,0,6.4016
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,256,1,0,13.5890
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,0,1.2135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,4,1,0,1.3429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,8,1,0,1.5373
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,16,1,0,2.0137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,32,1,0,3.3795
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,64,1,0,6.4798
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,128,1,0,13.2927
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,0,0.7786
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,4,1,0,0.7694
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,8,1,0,0.7753
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,16,1,0,0.7714
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,32,1,0,0.7731
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,64,1,0,0.7777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,128,1,0,0.7871
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,256,1,0,0.7732
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,512,1,0,0.7614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1024,1,0,0.7549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,2048,1,0,0.7700
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,4096,1,0,3.4935
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,8192,1,0,5.9487
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,16384,1,0,12.9959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,0,1.5553
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,4,1,0,1.5404
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,8,1,0,1.5406
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,16,1,0,1.4771
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,32,1,0,1.5005
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,64,1,0,1.4882
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,128,1,0,1.4582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,256,1,0,1.4669
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,512,1,0,1.4896
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1024,1,0,0.9833
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,2048,1,0,0.7997
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,4096,1,0,5.8535
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,8192,1,0,11.4967
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,16384,1,0,25.3455
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,0,0.7481
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,4,1,0,0.7430
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,8,1,0,0.7421
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,16,1,0,0.7380
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,32,1,0,0.7418
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,64,1,0,0.7441
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,128,1,0,0.7472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,256,1,0,0.7412
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,512,1,0,0.7636
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1024,1,0,0.7945
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,2048,1,0,0.9335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,4096,1,0,10.8080
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,8192,1,0,23.0404
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,16384,1,0,53.9864
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,0,0.7558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,4,1,0,0.7440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,8,1,0,0.7480
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,16,1,0,0.7560
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,32,1,0,0.7471
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,64,1,0,0.7492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,128,1,0,0.7467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,256,1,0,0.7584
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,512,1,0,0.7960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1024,1,0,0.8783
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,2048,1,0,1.2560
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,4096,1,0,21.6973
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,8192,1,0,47.6969
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,0,0.7471
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,4,1,0,0.7349
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,8,1,0,0.7438
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,16,1,0,0.7504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,32,1,0,0.7565
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,64,1,0,0.7628
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,128,1,0,0.7565
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,256,1,0,0.7986
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,512,1,0,0.8614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1024,1,0,1.1612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,2048,1,0,2.2219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,4096,1,0,43.0611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,0,0.7368
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,4,1,0,0.7481
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,8,1,0,0.7487
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,16,1,0,0.7442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,32,1,0,0.7522
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,64,1,0,0.7598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,128,1,0,0.7969
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,256,1,0,0.8685
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,512,1,0,1.1033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1024,1,0,2.0237
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,2048,1,0,4.5212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,0,0.7428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,4,1,0,0.7420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,8,1,0,0.7496
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,16,1,0,0.7440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,32,1,0,0.7555
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,64,1,0,0.7940
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,128,1,0,0.8718
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,256,1,0,1.0736
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,512,1,0,1.9210
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1024,1,0,4.0651
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,2048,1,0,9.3363
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,0,0.7472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,4,1,0,0.7580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,8,1,0,0.7522
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,16,1,0,0.7726
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,32,1,0,0.7983
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,64,1,0,0.8670
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,128,1,0,1.0747
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,256,1,0,1.8718
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,512,1,0,3.8425
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1024,1,0,8.1943
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,0,0.7485
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,4,1,0,0.7486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,8,1,0,0.7506
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,16,1,0,0.7984
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,32,1,0,0.8598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,64,1,0,1.0851
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,128,1,0,1.8492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,256,1,0,3.6734
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,512,1,0,8.1039
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,0,0.7922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,4,1,0,0.8237
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,8,1,0,0.8607
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,16,1,0,0.9355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,32,1,0,1.1566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,64,1,0,1.8797
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,128,1,0,3.6119
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,256,1,0,7.7416
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,0,0.9489
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,4,1,0,1.0039
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,8,1,0,1.0771
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,16,1,0,1.2933
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,32,1,0,1.9981
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,64,1,0,3.6869
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,128,1,0,7.4016
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,0,0.8575
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,4,1,0,0.8755
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,8,1,0,0.8711
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,16,1,0,0.8771
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,32,1,0,0.8633
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,64,1,0,0.8422
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,128,1,0,0.8492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,256,1,0,0.8705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,512,1,0,0.8811
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1024,1,0,0.9491
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,2048,1,0,1.3200
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,4096,1,0,5.2201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,8192,1,0,10.4005
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,16384,1,0,21.5704
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,0,0.8828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,4,1,0,0.8702
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,8,1,0,0.8706
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,16,1,0,0.8598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,32,1,0,0.8670
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,64,1,0,0.8669
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,128,1,0,0.8797
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,256,1,0,0.8978
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,512,1,0,0.9525
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1024,1,0,1.2224
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,2048,1,0,2.0022
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,4096,1,0,9.9971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,8192,1,0,20.5575
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,16384,1,0,45.8903
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,0,0.8717
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,4,1,0,0.8616
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,8,1,0,0.8665
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,16,1,0,0.8569
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,32,1,0,0.8530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,64,1,0,0.8769
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,128,1,0,0.9006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,256,1,0,0.9549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,512,1,0,1.1956
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1024,1,0,1.8107
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,2048,1,0,3.5586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,4096,1,0,19.7717
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,8192,1,0,41.2854
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,16384,1,0,93.7202
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,0,0.8536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,4,1,0,0.8631
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,8,1,0,0.8553
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,16,1,0,0.8529
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,32,1,0,0.8574
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,64,1,0,0.8901
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,128,1,0,0.9385
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,256,1,0,1.1582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,512,1,0,1.7108
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1024,1,0,3.1538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,2048,1,0,7.5242
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,4096,1,0,40.1679
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,8192,1,0,89.7600
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,0,0.8478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,4,1,0,0.8450
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,8,1,0,0.8558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,16,1,0,0.8701
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,32,1,0,0.8890
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,64,1,0,0.9449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,128,1,0,1.1420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,256,1,0,1.6669
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,512,1,0,2.9708
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1024,1,0,6.6505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,2048,1,0,15.0156
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,4096,1,0,88.3113
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,0,0.8485
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,4,1,0,0.8528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,8,1,0,0.8550
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,16,1,0,0.8877
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,32,1,0,0.9606
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,64,1,0,1.1455
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,128,1,0,1.6407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,256,1,0,2.8387
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,512,1,0,5.9684
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1024,1,0,13.3407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,2048,1,0,29.9902
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,0,0.8814
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,4,1,0,0.9162
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,8,1,0,0.9344
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,16,1,0,1.0585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,32,1,0,1.2220
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,64,1,0,1.6812
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,128,1,0,2.7799
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,256,1,0,5.9092
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,512,1,0,12.5054
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1024,1,0,26.8343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,0,1.0790
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,4,1,0,1.0908
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,8,1,0,1.1484
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,16,1,0,1.3485
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,32,1,0,1.8089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,64,1,0,2.8668
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,128,1,0,5.7448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,256,1,0,12.2118
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,512,1,0,24.9148
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,0,1.3484
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,4,1,0,1.4333
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,8,1,0,1.6056
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,16,1,0,2.0709
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,32,1,0,3.1215
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,64,1,0,5.8573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,128,1,0,11.8338
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,256,1,0,24.2019
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,0,1.8675
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,4,1,0,2.1489
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,8,1,0,2.5877
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,16,1,0,3.6766
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,32,1,0,6.4800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,64,1,0,12.1661
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,128,1,0,23.6178
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,0,3.1828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,4,1,0,3.6923
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,8,1,0,4.8521
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,16,1,0,7.8056
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,32,1,0,13.5121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,64,1,0,24.3652
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,0,0.8550
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,4,1,0,0.8553
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,8,1,0,0.8582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,16,1,0,0.8574
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,32,1,0,0.8651
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,64,1,0,0.8443
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,128,1,0,0.8564
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,256,1,0,0.8537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,512,1,0,0.8596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1024,1,0,0.8829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,2048,1,0,1.0237
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,4096,1,0,5.1391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,8192,1,0,9.3288
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,16384,1,0,19.3706
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,0,0.8606
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,4,1,0,0.8691
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,8,1,0,0.8610
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,16,1,0,0.8659
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,32,1,0,0.8654
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,64,1,0,0.8586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,128,1,0,0.8728
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,256,1,0,0.8655
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,512,1,0,0.9053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1024,1,0,0.9768
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,2048,1,0,1.3455
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,4096,1,0,8.8653
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,8192,1,0,18.4687
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,16384,1,0,39.5021
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,0,0.8407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,4,1,0,0.8512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,8,1,0,0.8477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,16,1,0,0.8617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,32,1,0,0.8602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,64,1,0,0.8492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,128,1,0,0.8672
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,256,1,0,0.8916
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,512,1,0,0.9500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1024,1,0,1.2357
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,2048,1,0,2.0232
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,4096,1,0,17.6110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,8192,1,0,36.7519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,16384,1,0,83.9020
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,0,0.8571
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,4,1,0,0.8485
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,8,1,0,0.8576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,16,1,0,0.8525
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,32,1,0,0.8532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,64,1,0,0.8545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,128,1,0,0.8894
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,256,1,0,0.9582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,512,1,0,1.1825
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1024,1,0,1.8387
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,2048,1,0,3.7519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,4096,1,0,35.4665
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,8192,1,0,79.8914
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,0,0.8470
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,4,1,0,0.8534
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,8,1,0,0.8455
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,16,1,0,0.8536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,32,1,0,0.8500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,64,1,0,0.8871
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,128,1,0,0.9450
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,256,1,0,1.1542
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,512,1,0,1.7351
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1024,1,0,3.3088
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,2048,1,0,7.8203
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,4096,1,0,77.9835
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,0,0.8505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,4,1,0,0.8561
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,8,1,0,0.8559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,16,1,0,0.8503
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,32,1,0,0.8879
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,64,1,0,0.9511
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,128,1,0,1.1373
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,256,1,0,1.6877
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,512,1,0,3.0634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1024,1,0,6.8772
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,2048,1,0,15.7795
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,0,0.8464
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,4,1,0,0.8403
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,8,1,0,0.8520
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,16,1,0,0.8883
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,32,1,0,0.9644
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,64,1,0,1.1471
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,128,1,0,1.6600
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,256,1,0,2.9996
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,512,1,0,6.4139
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1024,1,0,14.0678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,2048,1,0,31.4528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,0,0.9113
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,4,1,0,0.9280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,8,1,0,0.9505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,16,1,0,1.0250
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,32,1,0,1.2220
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,64,1,0,1.6872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,128,1,0,2.9247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,256,1,0,6.2974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,512,1,0,13.2194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1024,1,0,28.1485
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,0,1.0619
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,4,1,0,1.0793
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,8,1,0,1.1668
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,16,1,0,1.3504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,32,1,0,1.8229
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,64,1,0,2.9756
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,128,1,0,6.0680
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,256,1,0,12.7860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,512,1,0,26.3253
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,0,1.3333
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,4,1,0,1.4328
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,8,1,0,1.6227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,16,1,0,2.0770
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,32,1,0,3.2459
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,64,1,0,6.0485
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,128,1,0,12.4841
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,256,1,0,25.4373
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,0,1.8846
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,4,1,0,2.1786
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,8,1,0,2.6136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,16,1,0,3.8335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,32,1,0,6.8542
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,64,1,0,12.8238
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,128,1,0,24.8281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,0,0.8515
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,4,1,0,0.8633
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,8,1,0,0.8660
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,16,1,0,0.8643
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,32,1,0,0.8679
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,64,1,0,0.8607
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,128,1,0,0.8617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,256,1,0,0.8656
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,512,1,0,0.8678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1024,1,0,0.8586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,2048,1,0,0.8794
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,4096,1,0,4.6624
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,8192,1,0,8.6576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,16384,1,0,18.0075
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,0,1.6042
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,4,1,0,1.6287
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,8,1,0,1.6004
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,16,1,0,1.6510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,32,1,0,1.6111
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,64,1,0,1.4653
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,128,1,0,1.6186
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,256,1,0,0.8598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,512,1,0,0.8536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1024,1,0,0.8903
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,2048,1,0,1.0226
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,4096,1,0,8.1321
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,8192,1,0,16.4056
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,16384,1,0,35.0832
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,0,0.8467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,4,1,0,0.8431
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,8,1,0,0.8467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,16,1,0,0.8464
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,32,1,0,0.8497
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,64,1,0,0.8394
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,128,1,0,0.8525
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,256,1,0,0.8495
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,512,1,0,0.8884
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1024,1,0,0.9715
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,2048,1,0,1.3421
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,4096,1,0,15.7695
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,8192,1,0,33.1433
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,16384,1,0,76.2805
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,0,0.8488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,4,1,0,0.8614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,8,1,0,0.8372
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,16,1,0,0.8418
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,32,1,0,0.8556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,64,1,0,0.8631
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,128,1,0,0.8600
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,256,1,0,0.8854
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,512,1,0,0.9463
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1024,1,0,1.2428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,2048,1,0,2.0553
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,4096,1,0,31.4488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,8192,1,0,72.7582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,0,0.8373
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,4,1,0,0.8597
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,8,1,0,0.8576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,16,1,0,0.8548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,32,1,0,0.8525
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,64,1,0,0.8437
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,128,1,0,0.8719
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,256,1,0,0.9526
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,512,1,0,1.1896
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1024,1,0,1.8509
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,2048,1,0,4.0512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,4096,1,0,70.1756
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,0,0.8455
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,4,1,0,0.8436
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,8,1,0,0.8600
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,16,1,0,0.8386
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,32,1,0,0.8514
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,64,1,0,0.8737
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,128,1,0,0.9703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,256,1,0,1.1768
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,512,1,0,1.7532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1024,1,0,3.6549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,2048,1,0,8.5031
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,0,0.8367
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,4,1,0,0.8618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,8,1,0,0.8463
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,16,1,0,0.8459
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,32,1,0,0.8714
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,64,1,0,0.9507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,128,1,0,1.1392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,256,1,0,1.7034
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,512,1,0,3.3807
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1024,1,0,7.4859
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,2048,1,0,17.0885
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,0,0.8462
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,4,1,0,0.8501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,8,1,0,0.8565
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,16,1,0,0.8829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,32,1,0,0.9518
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,64,1,0,1.1603
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,128,1,0,1.6861
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,256,1,0,3.2597
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,512,1,0,7.1580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1024,1,0,15.3860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,0,0.8910
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,4,1,0,0.9047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,8,1,0,0.9520
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,16,1,0,1.0343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,32,1,0,1.2245
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,64,1,0,1.7106
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,128,1,0,3.1925
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,256,1,0,6.6136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,512,1,0,14.3688
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,0,1.0364
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,4,1,0,1.1026
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,8,1,0,1.1637
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,16,1,0,1.3716
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,32,1,0,1.8337
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,64,1,0,3.2430
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,128,1,0,6.6776
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,256,1,0,14.0037
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,0,1.3069
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,4,1,0,1.4240
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,8,1,0,1.6473
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,16,1,0,2.0911
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,32,1,0,3.5629
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,64,1,0,6.7449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,128,1,0,13.6749
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,0,0.8354
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,4,1,0,0.8535
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,8,1,0,0.8442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,16,1,0,0.8475
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,32,1,0,0.8490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,64,1,0,0.8453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,128,1,0,0.8480
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,256,1,0,0.8471
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,512,1,0,0.8514
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1024,1,0,0.8510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,2048,1,0,0.8586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,4096,1,0,4.4914
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,8192,1,0,8.1868
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,16384,1,0,16.7222
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,0,0.8494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,4,1,0,0.8559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,8,1,0,0.8528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,16,1,0,0.8633
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,32,1,0,0.8548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,64,1,0,0.8538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,128,1,0,0.8500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,256,1,0,0.8531
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,512,1,0,0.8550
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1024,1,0,0.8700
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,2048,1,0,0.8961
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,4096,1,0,7.8942
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,8192,1,0,15.5151
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,16384,1,0,33.4496
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,0,0.8407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,4,1,0,0.8528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,8,1,0,0.8441
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,16,1,0,0.8406
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,32,1,0,0.8420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,64,1,0,0.8440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,128,1,0,0.8556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,256,1,0,0.8565
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,512,1,0,0.8589
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1024,1,0,0.8943
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,2048,1,0,1.0353
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,4096,1,0,14.9250
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,8192,1,0,31.0604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,16384,1,0,72.3249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,0,0.8572
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,4,1,0,0.8619
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,8,1,0,0.8578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,16,1,0,0.8576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,32,1,0,0.8612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,64,1,0,0.8558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,128,1,0,0.8643
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,256,1,0,0.8646
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,512,1,0,0.9046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1024,1,0,0.9825
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,2048,1,0,1.3634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,4096,1,0,29.7170
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,8192,1,0,68.4033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,0,0.8519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,4,1,0,0.8521
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,8,1,0,0.8472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,16,1,0,0.8585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,32,1,0,0.8554
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,64,1,0,0.8437
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,128,1,0,0.8629
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,256,1,0,0.8995
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,512,1,0,0.9780
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1024,1,0,1.2578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,2048,1,0,2.3595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,4096,1,0,65.4792
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,0,0.8632
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,4,1,0,0.8453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,8,1,0,0.8594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,16,1,0,0.8585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,32,1,0,0.8502
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,64,1,0,0.8620
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,128,1,0,0.8990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,256,1,0,0.9734
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,512,1,0,1.2119
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1024,1,0,2.1556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,2048,1,0,4.7640
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,0,0.8514
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,4,1,0,0.8439
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,8,1,0,0.8447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,16,1,0,0.8614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,32,1,0,0.8628
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,64,1,0,0.8965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,128,1,0,0.9721
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,256,1,0,1.1845
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,512,1,0,2.0585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1024,1,0,4.3060
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,2048,1,0,9.6511
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,0,0.8588
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,4,1,0,0.8575
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,8,1,0,0.8467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,16,1,0,0.8684
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,32,1,0,0.8959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,64,1,0,0.9806
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,128,1,0,1.1733
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,256,1,0,2.0130
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,512,1,0,4.0939
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1024,1,0,8.9428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,0,0.8572
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,4,1,0,0.8588
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,8,1,0,0.8637
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,16,1,0,0.8955
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,32,1,0,0.9762
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,64,1,0,1.1887
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,128,1,0,1.9841
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,256,1,0,3.9458
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,512,1,0,8.4133
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,0,0.8875
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,4,1,0,0.9251
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,8,1,0,0.9618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,16,1,0,1.0430
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,32,1,0,1.2489
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,64,1,0,2.0171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,128,1,0,3.8909
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,256,1,0,8.1662
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,0,1.0227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,4,1,0,1.1053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,8,1,0,1.1829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,16,1,0,1.3905
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,32,1,0,2.1398
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,64,1,0,3.9541
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,128,1,0,8.0159
