framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,0,0.7236
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,4,1,0,0.7398
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,8,1,0,0.7268
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,16,1,0,0.7210
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,32,1,0,0.7194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,64,1,0,0.7124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,128,1,0,0.7127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,256,1,0,0.7288
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,512,1,0,0.7286
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1024,1,0,0.7671
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,2048,1,0,0.9445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,4096,1,0,3.5997
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,8192,1,0,6.5212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,16384,1,0,13.9380
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,0,0.7405
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,4,1,0,0.7058
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,8,1,0,0.7122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,16,1,0,0.7071
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,32,1,0,0.7077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,64,1,0,0.7100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,128,1,0,0.7206
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,256,1,0,0.7151
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,512,1,0,0.7684
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1024,1,0,0.8816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,2048,1,0,1.2600
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,4096,1,0,6.1535
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,8192,1,0,12.9705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,16384,1,0,28.3564
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,0,0.7358
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,4,1,0,0.6967
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,8,1,0,0.7045
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,16,1,0,0.6944
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,32,1,0,0.7110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,64,1,0,0.7090
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,128,1,0,0.7262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,256,1,0,0.7524
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,512,1,0,0.8458
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1024,1,0,1.1450
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,2048,1,0,1.9352
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,4096,1,0,12.2205
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,8192,1,0,26.3440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,0,0.6984
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,4,1,0,0.7288
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,8,1,0,0.7219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,16,1,0,0.7258
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,32,1,0,0.7323
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,64,1,0,0.7244
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,128,1,0,0.7217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,256,1,0,0.7252
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,512,1,0,0.7326
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1024,1,0,0.7408
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,2048,1,0,0.7985
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,4096,1,0,3.6598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,8192,1,0,6.5678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,16384,1,0,13.7139
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,0,0.7399
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,4,1,0,0.7295
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,8,1,0,0.7262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,16,1,0,0.7243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,32,1,0,0.7248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,64,1,0,0.7264
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,128,1,0,0.7212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,256,1,0,0.7318
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,512,1,0,0.7382
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1024,1,0,0.7846
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,2048,1,0,0.9491
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,4096,1,0,6.2072
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,8192,1,0,12.8020
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,16384,1,0,27.5495
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,0,0.7233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,4,1,0,0.7141
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,8,1,0,0.7145
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,16,1,0,0.7280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,32,1,0,0.7156
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,64,1,0,0.7091
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,128,1,0,0.7218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,256,1,0,0.7348
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,512,1,0,0.7755
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1024,1,0,0.8717
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,2048,1,0,1.2521
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,4096,1,0,12.1108
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,8192,1,0,25.8134
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,16384,1,0,56.0361
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,0,0.7138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,4,1,0,0.7104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,8,1,0,0.7135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,16,1,0,0.7094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,32,1,0,0.6985
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,64,1,0,0.7095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,128,1,0,0.7234
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,256,1,0,0.7707
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,512,1,0,0.8475
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1024,1,0,1.1337
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,2048,1,0,1.9561
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,4096,1,0,24.4819
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,8192,1,0,62.1266
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,0,0.6982
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,4,1,0,0.7229
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,8,1,0,0.7167
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,16,1,0,0.7118
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,32,1,0,0.7124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,64,1,0,0.7175
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,128,1,0,0.7154
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,256,1,0,0.7217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,512,1,0,0.7146
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1024,1,0,0.7222
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,2048,1,0,0.7359
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,4096,1,0,3.3642
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,8192,1,0,5.9203
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,16384,1,0,12.4391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,0,0.7286
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,4,1,0,0.7285
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,8,1,0,0.7262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,16,1,0,0.7219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,32,1,0,0.7306
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,64,1,0,0.7212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,128,1,0,0.7232
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,256,1,0,0.7206
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,512,1,0,0.7285
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1024,1,0,0.7392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,2048,1,0,0.7965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,4096,1,0,5.6570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,8192,1,0,11.6704
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,16384,1,0,25.5316
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,0,0.7286
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,4,1,0,0.7230
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,8,1,0,0.7179
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,16,1,0,0.7238
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,32,1,0,0.7192
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,64,1,0,0.7087
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,128,1,0,0.7240
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,256,1,0,0.7223
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,512,1,0,0.7389
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1024,1,0,0.7672
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,2048,1,0,0.9416
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,4096,1,0,11.0410
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,8192,1,0,23.7070
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,16384,1,0,50.7602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,0,0.7341
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,4,1,0,0.7286
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,8,1,0,0.7249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,16,1,0,0.7184
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,32,1,0,0.7152
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,64,1,0,0.7242
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,128,1,0,0.7145
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,256,1,0,0.7393
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,512,1,0,0.7599
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1024,1,0,0.8829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,2048,1,0,1.2729
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,4096,1,0,21.9926
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,8192,1,0,47.5389
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,16384,1,0,107.5121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,0,0.7145
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,4,1,0,0.7151
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,8,1,0,0.7081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,16,1,0,0.7067
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,32,1,0,0.7093
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,64,1,0,0.7100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,128,1,0,0.7164
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,256,1,0,0.7574
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,512,1,0,0.8403
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1024,1,0,1.1316
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,2048,1,0,2.1662
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,4096,1,0,45.2374
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,0,0.7219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,4,1,0,0.7045
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,8,1,0,0.7100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,16,1,0,0.7116
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,32,1,0,0.7086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,64,1,0,0.8388
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,128,1,0,0.7510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,256,1,0,0.8289
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,512,1,0,1.0774
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1024,1,0,1.9397
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,2048,1,0,4.2701
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,4096,1,0,93.6122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,0,0.7083
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,4,1,0,0.7098
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,8,1,0,0.7010
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,16,1,0,0.7171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,32,1,0,0.7276
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,64,1,0,0.7587
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,128,1,0,0.8299
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,256,1,0,1.0501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,512,1,0,1.8248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1024,1,0,3.7887
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,2048,1,0,8.7453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,0,0.7142
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,4,1,0,0.7205
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,8,1,0,0.7312
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,16,1,0,0.7496
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,32,1,0,0.7794
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,64,1,0,0.8651
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,128,1,0,1.0600
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,256,1,0,1.7699
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,512,1,0,3.5280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1024,1,0,7.7973
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,0,0.8048
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,4,1,0,0.8240
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,8,1,0,0.8377
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,16,1,0,0.8770
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,32,1,0,0.9559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,64,1,0,1.1367
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,128,1,0,1.7849
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,256,1,0,3.4065
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,512,1,0,7.3152
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,0,0.9870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,4,1,0,1.0139
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,8,1,0,1.0486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,16,1,0,1.1105
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,32,1,0,1.3008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,64,1,0,1.9494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,128,1,0,3.4581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,256,1,0,7.0321
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,0,1.3403
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,4,1,0,1.3708
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,8,1,0,1.4594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,16,1,0,1.6201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,32,1,0,2.2523
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,64,1,0,3.7955
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,128,1,0,7.1211
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,0,0.7051
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,4,1,0,0.7328
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,8,1,0,0.7336
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,16,1,0,0.7304
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,32,1,0,0.7379
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,64,1,0,0.7294
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,128,1,0,0.7413
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,256,1,0,0.7337
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,512,1,0,0.7370
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1024,1,0,0.7254
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,2048,1,0,0.7418
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,4096,1,0,3.1589
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,8192,1,0,5.5262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,16384,1,0,11.5879
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,0,0.7263
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,4,1,0,0.7310
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,8,1,0,0.7272
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,16,1,0,0.7329
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,32,1,0,0.7324
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,64,1,0,0.7351
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,128,1,0,0.7290
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,256,1,0,0.7304
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,512,1,0,0.7317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1024,1,0,0.7284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,2048,1,0,0.7380
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,4096,1,0,5.1883
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,8192,1,0,10.5904
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,16384,1,0,23.4477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,0,0.7305
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,4,1,0,0.7271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,8,1,0,0.7201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,16,1,0,0.7407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,32,1,0,0.7345
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,64,1,0,0.7342
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,128,1,0,0.7324
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,256,1,0,0.7317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,512,1,0,0.7330
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1024,1,0,0.7495
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,2048,1,0,0.8175
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,4096,1,0,9.8886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,8192,1,0,21.1032
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,16384,1,0,47.7893
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,0,0.7221
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,4,1,0,0.7353
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,8,1,0,0.7272
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,16,1,0,0.7328
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,32,1,0,0.7364
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,64,1,0,0.7347
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,128,1,0,0.7316
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,256,1,0,0.7340
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,512,1,0,0.7433
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1024,1,0,0.7858
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,2048,1,0,0.9604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,4096,1,0,20.4194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,8192,1,0,43.7922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,16384,1,0,99.9308
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,0,0.7257
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,4,1,0,0.7396
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,8,1,0,0.7359
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,16,1,0,0.7276
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,32,1,0,0.7406
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,64,1,0,0.7317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,128,1,0,0.7306
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,256,1,0,0.7321
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,512,1,0,0.7731
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1024,1,0,0.8906
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,2048,1,0,1.3497
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,4096,1,0,40.8058
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,0,0.7371
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,4,1,0,0.7239
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,8,1,0,0.7310
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,16,1,0,0.7314
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,32,1,0,0.7276
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,64,1,0,0.7406
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,128,1,0,0.7369
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,256,1,0,0.7792
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,512,1,0,0.8595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1024,1,0,1.2364
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,2048,1,0,2.5288
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,4096,1,0,84.8377
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,0,0.7277
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,4,1,0,0.7215
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,8,1,0,0.7200
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,16,1,0,0.7406
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,32,1,0,0.7260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,64,1,0,0.7289
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,128,1,0,0.7722
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,256,1,0,0.8433
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,512,1,0,1.1722
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1024,1,0,2.3025
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,2048,1,0,5.1101
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,0,0.7419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,4,1,0,0.7384
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,8,1,0,0.7343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,16,1,0,0.7318
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,32,1,0,0.7392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,64,1,0,0.7780
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,128,1,0,0.8570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,256,1,0,1.1485
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,512,1,0,2.1840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1024,1,0,4.5761
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,0,0.7409
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,4,1,0,0.7272
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,8,1,0,0.7424
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,16,1,0,0.7620
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,32,1,0,0.8075
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,64,1,0,0.8893
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,128,1,0,1.1566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,256,1,0,2.1304
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,512,1,0,4.2924
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,0,0.8374
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,4,1,0,0.8378
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,8,1,0,0.8400
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,16,1,0,0.8848
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,32,1,0,0.9599
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,64,1,0,1.2315
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,128,1,0,2.1449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,256,1,0,4.2198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,0,0.9996
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,4,1,0,1.0283
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,8,1,0,1.0570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,16,1,0,1.1208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,32,1,0,1.3863
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,64,1,0,2.3049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,128,1,0,4.2318
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,0,0.7154
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,4,1,0,0.7434
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,8,1,0,0.7374
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,16,1,0,0.7277
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,32,1,0,0.7327
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,64,1,0,0.7299
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,128,1,0,0.7286
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,256,1,0,0.7363
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,512,1,0,0.7355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1024,1,0,0.7379
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,2048,1,0,0.7260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,4096,1,0,3.0700
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,8192,1,0,5.3484
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,16384,1,0,11.2220
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,0,0.7370
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,4,1,0,0.7316
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,8,1,0,0.7331
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,16,1,0,0.7310
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,32,1,0,0.7248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,64,1,0,0.7323
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,128,1,0,0.7292
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,256,1,0,0.7384
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,512,1,0,0.7421
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1024,1,0,0.7409
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,2048,1,0,0.7478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,4096,1,0,5.0163
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,8192,1,0,10.4799
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,16384,1,0,22.4957
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,0,0.7275
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,4,1,0,0.7248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,8,1,0,0.7375
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,16,1,0,0.7155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,32,1,0,0.7302
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,64,1,0,0.7224
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,128,1,0,0.7335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,256,1,0,0.7293
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,512,1,0,0.7416
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1024,1,0,0.7364
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,2048,1,0,0.7559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,4096,1,0,9.7143
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,8192,1,0,20.0494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,16384,1,0,45.8957
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,0,0.7301
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,4,1,0,0.7247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,8,1,0,0.7243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,16,1,0,0.7327
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,32,1,0,0.7275
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,64,1,0,0.7376
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,128,1,0,0.7248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,256,1,0,0.7273
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,512,1,0,0.7390
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1024,1,0,0.7573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,2048,1,0,0.8167
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,4096,1,0,18.8533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,8192,1,0,42.5938
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,16384,1,0,100.8846
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,0,0.7205
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,4,1,0,0.7180
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,8,1,0,0.7184
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,16,1,0,0.7227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,32,1,0,0.7189
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,64,1,0,0.7375
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,128,1,0,0.7370
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,256,1,0,0.7312
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,512,1,0,0.7546
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1024,1,0,0.7967
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,2048,1,0,0.9756
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,4096,1,0,39.2096
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,0,0.7208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,4,1,0,0.7171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,8,1,0,0.7257
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,16,1,0,0.7378
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,32,1,0,0.7346
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,64,1,0,0.7329
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,128,1,0,0.7402
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,256,1,0,0.7477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,512,1,0,0.7857
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1024,1,0,0.9187
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,2048,1,0,1.7228
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,4096,1,0,82.2570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,0,0.7309
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,4,1,0,0.7279
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,8,1,0,0.7381
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,16,1,0,0.7271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,32,1,0,0.7305
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,64,1,0,0.7318
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,128,1,0,0.7543
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,256,1,0,0.7833
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,512,1,0,0.8810
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1024,1,0,1.6113
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,2048,1,0,3.2787
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,0,0.7284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,4,1,0,0.7343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,8,1,0,0.7354
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,16,1,0,0.7440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,32,1,0,0.7297
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,64,1,0,0.7470
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,128,1,0,0.7905
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,256,1,0,0.8705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,512,1,0,1.5548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1024,1,0,3.0742
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,0,0.7374
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,4,1,0,0.7324
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,8,1,0,0.7357
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,16,1,0,0.7454
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,32,1,0,0.8104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,64,1,0,0.7880
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,128,1,0,0.8684
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,256,1,0,1.5344
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,512,1,0,2.9593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,0,0.7393
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,4,1,0,0.7339
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,8,1,0,0.7484
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,16,1,0,0.7684
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,32,1,0,0.8157
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,64,1,0,0.9073
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,128,1,0,1.5417
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,256,1,0,2.9173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,0,0.8270
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,4,1,0,0.8515
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,8,1,0,0.8681
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,16,1,0,0.9014
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,32,1,0,0.9919
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,64,1,0,1.6205
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,128,1,0,2.9317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,0,0.7386
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,4,1,0,0.7641
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,8,1,0,0.7316
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,16,1,0,0.7281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,32,1,0,0.7307
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,64,1,0,0.7208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,128,1,0,0.7318
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,256,1,0,0.7324
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,512,1,0,0.7593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1024,1,0,0.7771
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,2048,1,0,0.9649
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,4096,1,0,2.7323
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,8192,1,0,4.9140
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,16384,1,0,10.3540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,0,0.7547
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,4,1,0,0.7194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,8,1,0,0.7258
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,16,1,0,0.7229
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,32,1,0,0.7169
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,64,1,0,0.7193
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,128,1,0,0.7302
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,256,1,0,0.7258
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,512,1,0,0.7708
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1024,1,0,0.8876
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,2048,1,0,1.2747
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,4096,1,0,4.4664
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,8192,1,0,9.4844
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,16384,1,0,20.5900
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,0,0.7473
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,4,1,0,0.7330
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,8,1,0,0.7104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,16,1,0,0.7236
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,32,1,0,0.7187
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,64,1,0,0.7303
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,128,1,0,0.7313
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,256,1,0,0.7781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,512,1,0,0.8586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1024,1,0,1.1625
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,2048,1,0,1.9557
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,4096,1,0,8.6498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,8192,1,0,19.2695
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,16384,1,0,26.7002
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,0,0.7144
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,4,1,0,0.7230
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,8,1,0,0.7215
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,16,1,0,0.7287
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,32,1,0,0.7502
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,64,1,0,0.7475
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,128,1,0,0.7721
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,256,1,0,0.8573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,512,1,0,1.1073
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1024,1,0,1.7580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,2048,1,0,3.6468
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,4096,1,0,17.2475
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,8192,1,0,23.6526
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,0,0.7367
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,4,1,0,0.7197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,8,1,0,0.7198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,16,1,0,0.7281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,32,1,0,0.7363
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,64,1,0,0.7801
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,128,1,0,0.8499
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,256,1,0,1.0901
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,512,1,0,1.6431
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1024,1,0,3.2536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,2048,1,0,7.3674
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,4096,1,0,21.5538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,0,0.7563
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,4,1,0,0.7519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,8,1,0,0.7524
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,16,1,0,0.7627
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,32,1,0,0.8067
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,64,1,0,0.8894
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,128,1,0,1.0912
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,256,1,0,1.6182
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,512,1,0,3.1364
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1024,1,0,6.5998
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,2048,1,0,15.2136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,0,0.8498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,4,1,0,0.8463
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,8,1,0,0.8463
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,16,1,0,0.8919
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,32,1,0,0.9629
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,64,1,0,1.1699
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,128,1,0,1.6200
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,256,1,0,2.9301
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,512,1,0,6.1009
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1024,1,0,13.5576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,0,1.0030
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,4,1,0,1.0187
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,8,1,0,1.0445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,16,1,0,1.1274
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,32,1,0,1.3233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,64,1,0,1.7611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,128,1,0,2.9385
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,256,1,0,5.9441
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,512,1,0,12.6160
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,0,1.3311
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,4,1,0,1.3705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,8,1,0,1.4495
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,16,1,0,1.6231
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,32,1,0,2.0779
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,64,1,0,3.2352
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,128,1,0,6.0684
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,256,1,0,12.0397
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,0,1.9898
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,4,1,0,2.1085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,8,1,0,2.2760
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,16,1,0,2.6787
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,32,1,0,3.8679
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,64,1,0,6.6194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,128,1,0,12.4420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,0,3.3414
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,4,1,0,3.5807
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,8,1,0,3.9833
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,16,1,0,5.1093
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,32,1,0,7.8828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,64,1,0,13.7017
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,0,0.7437
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,4,1,0,0.7611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,8,1,0,0.7669
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,16,1,0,0.7594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,32,1,0,0.7745
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,64,1,0,0.7715
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,128,1,0,0.7530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,256,1,0,0.7508
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,512,1,0,0.7628
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1024,1,0,0.7711
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,2048,1,0,0.8363
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,4096,1,0,2.2875
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,8192,1,0,3.9564
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,16384,1,0,8.4223
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,0,0.7602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,4,1,0,0.7503
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,8,1,0,0.7493
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,16,1,0,0.7589
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,32,1,0,0.7483
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,64,1,0,0.7527
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,128,1,0,0.7580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,256,1,0,0.7638
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,512,1,0,0.7739
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1024,1,0,0.8046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,2048,1,0,0.9717
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,4096,1,0,3.5329
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,8192,1,0,7.5186
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,16384,1,0,16.5296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,0,0.7531
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,4,1,0,0.7585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,8,1,0,0.7632
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,16,1,0,0.7507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,32,1,0,0.7605
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,64,1,0,0.7573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,128,1,0,0.7540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,256,1,0,0.7747
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,512,1,0,0.8138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1024,1,0,0.9127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,2048,1,0,1.2813
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,4096,1,0,6.6945
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,8192,1,0,14.7160
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,16384,1,0,19.7411
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,0,0.7404
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,4,1,0,0.7488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,8,1,0,0.7390
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,16,1,0,0.7285
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,32,1,0,0.7429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,64,1,0,0.7420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,128,1,0,0.7593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,256,1,0,0.7963
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,512,1,0,0.8683
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1024,1,0,1.1652
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,2048,1,0,1.9756
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,4096,1,0,13.0734
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,8192,1,0,16.8280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,16384,1,0,45.0527
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,0,0.7484
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,4,1,0,0.7525
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,8,1,0,0.7430
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,16,1,0,0.7540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,32,1,0,0.7531
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,64,1,0,0.7603
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,128,1,0,0.8016
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,256,1,0,0.8569
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,512,1,0,1.1073
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1024,1,0,1.7695
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,2048,1,0,3.8063
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,4096,1,0,14.8271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,0,0.7466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,4,1,0,0.7524
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,8,1,0,0.7489
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,16,1,0,0.7611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,32,1,0,0.7696
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,64,1,0,0.8009
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,128,1,0,0.8607
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,256,1,0,1.0860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,512,1,0,1.6646
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1024,1,0,3.3694
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,2048,1,0,7.7766
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,4096,1,0,32.6368
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,0,0.7369
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,4,1,0,0.7512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,8,1,0,0.7569
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,16,1,0,0.7719
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,32,1,0,0.8186
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,64,1,0,0.8887
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,128,1,0,1.0833
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,256,1,0,1.6202
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,512,1,0,3.1852
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1024,1,0,6.9277
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,2048,1,0,15.7668
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,0,0.8574
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,4,1,0,0.8661
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,8,1,0,0.8584
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,16,1,0,0.8991
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,32,1,0,0.9691
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,64,1,0,1.1607
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,128,1,0,1.6300
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,256,1,0,3.0670
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,512,1,0,6.2048
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1024,1,0,13.8301
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,0,1.0477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,4,1,0,1.0396
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,8,1,0,1.0647
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,16,1,0,1.1335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,32,1,0,1.3126
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,64,1,0,1.7813
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,128,1,0,3.1030
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,256,1,0,6.1698
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,512,1,0,13.1758
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,0,1.3896
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,4,1,0,1.3948
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,8,1,0,1.4560
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,16,1,0,1.6279
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,32,1,0,2.0803
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,64,1,0,3.4075
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,128,1,0,6.2059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,256,1,0,12.5434
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,0,2.0318
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,4,1,0,2.1244
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,8,1,0,2.2860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,16,1,0,2.6960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,32,1,0,4.0248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,64,1,0,6.7993
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,128,1,0,13.1082
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,0,0.7537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,4,1,0,0.7919
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,8,1,0,0.7783
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,16,1,0,0.7730
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,32,1,0,0.7566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,64,1,0,0.7676
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,128,1,0,0.7639
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,256,1,0,0.7674
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,512,1,0,0.7787
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1024,1,0,0.7854
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,2048,1,0,0.7897
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,4096,1,0,2.0711
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,8192,1,0,3.4953
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,16384,1,0,7.4952
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,0,0.7665
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,4,1,0,0.7528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,8,1,0,0.7608
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,16,1,0,0.7596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,32,1,0,0.7571
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,64,1,0,0.7616
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,128,1,0,0.7631
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,256,1,0,0.7722
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,512,1,0,0.7633
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1024,1,0,0.7736
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,2048,1,0,0.8305
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,4096,1,0,3.0650
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,8192,1,0,6.4918
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,16384,1,0,14.5294
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,0,0.7675
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,4,1,0,0.7676
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,8,1,0,0.7624
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,16,1,0,0.7736
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,32,1,0,0.7636
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,64,1,0,0.7752
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,128,1,0,0.7735
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,256,1,0,0.7686
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,512,1,0,0.7845
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1024,1,0,0.8114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,2048,1,0,0.9883
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,4096,1,0,5.6461
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,8192,1,0,12.7450
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,16384,1,0,16.5073
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,0,0.7741
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,4,1,0,0.7685
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,8,1,0,0.7686
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,16,1,0,0.7680
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,32,1,0,0.7672
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,64,1,0,0.7678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,128,1,0,0.7744
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,256,1,0,0.7859
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,512,1,0,0.8099
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1024,1,0,0.9296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,2048,1,0,1.3078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,4096,1,0,11.0254
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,8192,1,0,13.5992
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,16384,1,0,37.6201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,0,0.7504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,4,1,0,0.7789
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,8,1,0,0.7591
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,16,1,0,0.7586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,32,1,0,0.7604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,64,1,0,0.7601
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,128,1,0,0.7749
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,256,1,0,0.8049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,512,1,0,0.8858
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1024,1,0,1.1891
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,2048,1,0,2.1974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,4096,1,0,11.5753
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,0,0.7637
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,4,1,0,0.7596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,8,1,0,0.7624
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,16,1,0,0.7582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,32,1,0,0.7679
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,64,1,0,0.7726
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,128,1,0,0.8064
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,256,1,0,0.8624
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,512,1,0,1.1292
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1024,1,0,1.9718
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,2048,1,0,4.3095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,4096,1,0,25.7148
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,0,0.7578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,4,1,0,0.7632
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,8,1,0,0.7617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,16,1,0,0.7573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,32,1,0,0.7666
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,64,1,0,0.8059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,128,1,0,0.8698
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,256,1,0,1.0990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,512,1,0,1.8518
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1024,1,0,3.8116
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,2048,1,0,8.9732
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,0,0.7602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,4,1,0,0.7708
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,8,1,0,0.7792
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,16,1,0,0.8022
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,32,1,0,0.8246
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,64,1,0,0.9080
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,128,1,0,1.1008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,256,1,0,1.8019
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,512,1,0,3.5990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1024,1,0,7.8184
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,0,0.8669
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,4,1,0,0.8668
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,8,1,0,0.8860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,16,1,0,0.9117
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,32,1,0,0.9951
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,64,1,0,1.1818
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,128,1,0,1.8198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,256,1,0,3.4374
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,512,1,0,7.4721
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,0,1.0466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,4,1,0,1.0586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,8,1,0,1.0824
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,16,1,0,1.1593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,32,1,0,1.3429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,64,1,0,1.9754
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,128,1,0,3.4834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,256,1,0,7.0466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,0,1.3795
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,4,1,0,1.4395
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,8,1,0,1.5093
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,16,1,0,1.6604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,32,1,0,2.2876
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,64,1,0,3.8444
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,128,1,0,7.1168
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,0,0.7490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,4,1,0,0.7555
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,8,1,0,0.7534
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,16,1,0,0.7648
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,32,1,0,0.7593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,64,1,0,0.7657
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,128,1,0,0.7520
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,256,1,0,0.7560
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,512,1,0,0.7517
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1024,1,0,0.7598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,2048,1,0,0.7632
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,4096,1,0,1.9436
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,8192,1,0,3.2032
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,16384,1,0,6.7990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,0,0.7645
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,4,1,0,0.7667
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,8,1,0,0.7678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,16,1,0,0.7668
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,32,1,0,0.7626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,64,1,0,0.7610
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,128,1,0,0.7612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,256,1,0,0.7648
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,512,1,0,0.7598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1024,1,0,0.7735
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,2048,1,0,0.7757
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,4096,1,0,2.7840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,8192,1,0,5.8868
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,16384,1,0,13.1495
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,0,0.7541
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,4,1,0,0.7536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,8,1,0,0.7581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,16,1,0,0.7542
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,32,1,0,0.7572
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,64,1,0,0.7520
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,128,1,0,0.7521
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,256,1,0,0.7594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,512,1,0,0.7591
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1024,1,0,0.7704
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,2048,1,0,0.8262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,4096,1,0,5.0546
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,8192,1,0,11.3470
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,16384,1,0,14.9802
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,0,0.7554
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,4,1,0,0.7519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,8,1,0,0.7595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,16,1,0,0.7399
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,32,1,0,0.7503
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,64,1,0,0.7558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,128,1,0,0.7590
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,256,1,0,0.7576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,512,1,0,0.7674
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1024,1,0,0.8000
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,2048,1,0,0.9791
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,4096,1,0,9.7047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,8192,1,0,12.0641
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,16384,1,0,34.0407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,0,0.7551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,4,1,0,0.7513
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,8,1,0,0.7569
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,16,1,0,0.7565
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,32,1,0,0.7573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,64,1,0,0.7559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,128,1,0,0.7578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,256,1,0,0.7546
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,512,1,0,0.8036
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1024,1,0,0.9082
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,2048,1,0,1.3750
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,4096,1,0,10.0382
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,0,0.7501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,4,1,0,0.7538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,8,1,0,0.7515
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,16,1,0,0.7527
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,32,1,0,0.7572
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,64,1,0,0.7599
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,128,1,0,0.7791
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,256,1,0,0.7988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,512,1,0,0.8849
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1024,1,0,1.2569
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,2048,1,0,2.5695
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,4096,1,0,22.5746
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,0,0.7517
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,4,1,0,0.7474
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,8,1,0,0.7496
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,16,1,0,0.7601
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,32,1,0,0.7598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,64,1,0,0.7734
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,128,1,0,0.8033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,256,1,0,0.8600
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,512,1,0,1.2045
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1024,1,0,2.3510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,2048,1,0,5.0708
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,0,0.7547
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,4,1,0,0.7528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,8,1,0,0.7562
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,16,1,0,0.7592
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,32,1,0,0.7665
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,64,1,0,0.7993
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,128,1,0,0.8650
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,256,1,0,1.1749
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,512,1,0,2.2255
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1024,1,0,4.5974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,0,0.7540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,4,1,0,0.7684
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,8,1,0,0.7761
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,16,1,0,0.7879
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,32,1,0,0.8212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,64,1,0,0.9150
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,128,1,0,1.1800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,256,1,0,2.1768
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,512,1,0,4.3712
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,0,0.8405
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,4,1,0,0.8701
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,8,1,0,0.8863
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,16,1,0,0.9239
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,32,1,0,0.9921
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,64,1,0,1.2618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,128,1,0,2.1921
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,256,1,0,4.2295
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,0,1.3866
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,4,1,0,1.0675
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,8,1,0,1.1018
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,16,1,0,1.1723
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,32,1,0,1.4161
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,64,1,0,2.3520
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,128,1,0,4.2981
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,0,0.7679
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,4,1,0,0.7868
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,8,1,0,0.7725
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,16,1,0,0.7792
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,32,1,0,0.7618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,64,1,0,0.7739
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,128,1,0,0.7660
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,256,1,0,0.7592
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,512,1,0,0.7664
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1024,1,0,0.7770
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,2048,1,0,0.7612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,4096,1,0,1.9055
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,8192,1,0,3.0983
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,16384,1,0,6.5709
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,0,0.7728
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,4,1,0,0.7756
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,8,1,0,0.7718
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,16,1,0,0.7700
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,32,1,0,0.7595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,64,1,0,0.7635
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,128,1,0,0.7558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,256,1,0,0.7671
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,512,1,0,0.7716
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1024,1,0,0.7659
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,2048,1,0,0.7684
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,4096,1,0,2.6753
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,8192,1,0,5.6612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,16384,1,0,12.6552
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,0,0.7672
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,4,1,0,0.7663
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,8,1,0,0.7587
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,16,1,0,0.7634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,32,1,0,0.7598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,64,1,0,0.7577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,128,1,0,0.7661
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,256,1,0,0.7717
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,512,1,0,0.7688
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1024,1,0,0.7709
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,2048,1,0,0.7864
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,4096,1,0,4.8339
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,8192,1,0,10.8722
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,16384,1,0,14.2512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,0,0.7655
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,4,1,0,0.7686
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,8,1,0,0.7594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,16,1,0,0.7617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,32,1,0,0.7695
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,64,1,0,0.7698
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,128,1,0,0.7611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,256,1,0,0.7717
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,512,1,0,0.7646
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1024,1,0,0.7880
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,2048,1,0,0.8534
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,4096,1,0,9.2451
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,8192,1,0,11.3378
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,16384,1,0,33.0006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,0,0.7828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,4,1,0,0.7673
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,8,1,0,0.7847
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,16,1,0,0.7645
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,32,1,0,0.7649
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,64,1,0,0.7624
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,128,1,0,0.7619
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,256,1,0,0.7690
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,512,1,0,0.7838
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1024,1,0,0.8272
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,2048,1,0,1.0002
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,4096,1,0,9.3066
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,0,0.7635
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,4,1,0,0.7604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,8,1,0,0.7632
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,16,1,0,0.7530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,32,1,0,0.7545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,64,1,0,0.7612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,128,1,0,0.7637
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,256,1,0,0.7799
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,512,1,0,0.8218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1024,1,0,0.9347
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,2048,1,0,1.7718
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,4096,1,0,21.2979
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,0,0.7577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,4,1,0,0.7520
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,8,1,0,0.7512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,16,1,0,0.7435
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,32,1,0,0.7542
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,64,1,0,0.7538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,128,1,0,0.7602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,256,1,0,0.8124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,512,1,0,0.9011
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1024,1,0,1.6636
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,2048,1,0,3.3664
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,0,0.7574
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,4,1,0,0.7593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,8,1,0,0.7558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,16,1,0,0.7571
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,32,1,0,0.7627
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,64,1,0,0.7733
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,128,1,0,0.8100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,256,1,0,0.8965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,512,1,0,1.6045
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1024,1,0,3.1574
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,0,0.7532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,4,1,0,0.7625
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,8,1,0,0.7618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,16,1,0,0.7643
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,32,1,0,0.7580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,64,1,0,0.8182
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,128,1,0,0.8911
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,256,1,0,1.5836
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,512,1,0,3.0409
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,0,0.7760
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,4,1,0,0.7614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,8,1,0,0.7722
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,16,1,0,0.7899
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,32,1,0,0.8302
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,64,1,0,0.9373
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,128,1,0,1.5886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,256,1,0,2.9979
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,0,0.8559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,4,1,0,0.8602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,8,1,0,0.8888
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,16,1,0,0.9391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,32,1,0,1.0355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,64,1,0,1.6681
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,128,1,0,3.0137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,0,0.7072
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,4,1,0,0.7406
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,8,1,0,0.6987
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,16,1,0,0.6943
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,32,1,0,0.7006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,64,1,0,0.7053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,128,1,0,0.6937
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,256,1,0,0.7209
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,512,1,0,0.7238
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1024,1,0,0.7448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,2048,1,0,0.9291
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,4096,1,0,3.5536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,8192,1,0,6.5290
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,16384,1,0,13.9742
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,0,0.7280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,4,1,0,0.6897
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,8,1,0,0.6901
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,16,1,0,0.6903
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,32,1,0,0.6878
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,64,1,0,0.6937
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,128,1,0,0.6951
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,256,1,0,0.7064
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,512,1,0,0.7390
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1024,1,0,0.8496
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,2048,1,0,1.2367
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,4096,1,0,6.2101
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,8192,1,0,13.1442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,16384,1,0,28.2013
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,0,0.7271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,4,1,0,0.6881
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,8,1,0,0.6870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,16,1,0,0.6936
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,32,1,0,0.6955
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,64,1,0,0.7059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,128,1,0,0.7115
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,256,1,0,0.7428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,512,1,0,0.8312
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1024,1,0,1.1405
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,2048,1,0,1.9328
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,4096,1,0,12.4432
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,8192,1,0,26.7518
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,0,0.7434
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,4,1,0,0.7608
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,8,1,0,0.7655
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,16,1,0,0.7484
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,32,1,0,0.7578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,64,1,0,0.7623
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,128,1,0,0.7459
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,256,1,0,0.7544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,512,1,0,0.7482
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1024,1,0,0.7678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,2048,1,0,0.8331
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,4096,1,0,3.6987
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,8192,1,0,6.6290
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,16384,1,0,13.7660
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,0,0.7528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,4,1,0,0.7407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,8,1,0,0.7410
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,16,1,0,0.7425
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,32,1,0,0.7355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,64,1,0,0.7290
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,128,1,0,0.7373
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,256,1,0,0.7429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,512,1,0,0.7532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1024,1,0,0.7852
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,2048,1,0,0.9456
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,4096,1,0,6.2516
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,8192,1,0,12.8440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,16384,1,0,27.8637
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,0,0.7603
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,4,1,0,0.7582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,8,1,0,0.7485
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,16,1,0,0.7504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,32,1,0,0.7472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,64,1,0,0.7544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,128,1,0,0.7452
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,256,1,0,0.7518
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,512,1,0,0.8019
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1024,1,0,0.8975
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,2048,1,0,1.2618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,4096,1,0,12.1586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,8192,1,0,26.1643
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,16384,1,0,56.4516
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,0,0.7740
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,4,1,0,0.7704
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,8,1,0,0.7705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,16,1,0,0.7617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,32,1,0,0.7441
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,64,1,0,0.7513
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,128,1,0,0.7673
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,256,1,0,0.7942
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,512,1,0,0.8651
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1024,1,0,1.1506
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,2048,1,0,1.9673
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,4096,1,0,24.2909
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,8192,1,0,53.0172
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,0,0.7750
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,4,1,0,0.8041
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,8,1,0,0.8105
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,16,1,0,0.7873
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,32,1,0,0.7861
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,64,1,0,0.7815
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,128,1,0,0.7803
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,256,1,0,0.7698
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,512,1,0,0.7638
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1024,1,0,0.7637
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,2048,1,0,0.7630
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,4096,1,0,3.4080
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,8192,1,0,5.9715
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,16384,1,0,12.4557
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,0,0.7488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,4,1,0,0.7501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,8,1,0,0.7538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,16,1,0,0.7468
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,32,1,0,0.7488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,64,1,0,0.7340
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,128,1,0,0.7429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,256,1,0,0.7461
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,512,1,0,0.7474
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1024,1,0,0.7606
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,2048,1,0,0.8173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,4096,1,0,5.7033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,8192,1,0,11.5676
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,16384,1,0,25.3254
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,0,0.7453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,4,1,0,0.7426
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,8,1,0,0.7341
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,16,1,0,0.7406
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,32,1,0,0.7361
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,64,1,0,0.7431
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,128,1,0,0.7428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,256,1,0,0.7452
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,512,1,0,0.7463
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1024,1,0,0.7860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,2048,1,0,0.9581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,4096,1,0,10.9924
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,8192,1,0,23.3399
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,16384,1,0,50.7881
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,0,0.7401
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,4,1,0,0.7419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,8,1,0,0.7356
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,16,1,0,0.7441
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,32,1,0,0.7287
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,64,1,0,0.7481
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,128,1,0,0.7483
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,256,1,0,0.7558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,512,1,0,0.7897
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1024,1,0,0.9056
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,2048,1,0,1.2920
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,4096,1,0,21.8895
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,8192,1,0,47.8095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,16384,1,0,108.3010
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,0,0.7498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,4,1,0,0.7490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,8,1,0,0.7466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,16,1,0,0.7486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,32,1,0,0.7605
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,64,1,0,0.7514
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,128,1,0,0.7674
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,256,1,0,0.8076
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,512,1,0,0.8724
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1024,1,0,1.1754
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,2048,1,0,2.1767
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,4096,1,0,45.4173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,0,0.7323
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,4,1,0,0.7369
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,8,1,0,0.7332
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,16,1,0,0.7432
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,32,1,0,0.7498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,64,1,0,0.7527
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,128,1,0,0.7736
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,256,1,0,0.8433
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,512,1,0,1.0996
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1024,1,0,1.9466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,2048,1,0,4.3548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,4096,1,0,93.3671
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,0,0.7257
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,4,1,0,0.7354
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,8,1,0,0.7429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,16,1,0,0.7475
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,32,1,0,0.7446
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,64,1,0,0.7678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,128,1,0,0.8558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,256,1,0,1.0751
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,512,1,0,1.8319
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1024,1,0,3.8019
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,2048,1,0,9.1499
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,0,0.7520
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,4,1,0,0.7483
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,8,1,0,0.7576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,16,1,0,0.7712
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,32,1,0,0.8032
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,64,1,0,0.8852
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,128,1,0,1.0751
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,256,1,0,1.7765
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,512,1,0,3.5392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1024,1,0,7.8392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,0,0.8554
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,4,1,0,0.8454
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,8,1,0,0.8526
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,16,1,0,0.8910
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,32,1,0,0.9705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,64,1,0,1.1555
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,128,1,0,1.7931
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,256,1,0,3.3986
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,512,1,0,7.3264
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,0,1.0748
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,4,1,0,1.0514
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,8,1,0,1.0549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,16,1,0,1.1234
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,32,1,0,1.3175
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,64,1,0,1.9530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,128,1,0,3.4442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,256,1,0,7.0338
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,0,1.3618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,4,1,0,1.4013
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,8,1,0,1.4766
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,16,1,0,1.6327
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,32,1,0,2.2582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,64,1,0,3.7940
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,128,1,0,7.1428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,0,0.7213
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,4,1,0,0.7362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,8,1,0,0.7378
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,16,1,0,0.7391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,32,1,0,0.7371
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,64,1,0,0.7467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,128,1,0,0.7366
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,256,1,0,0.7483
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,512,1,0,0.7349
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1024,1,0,0.7393
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,2048,1,0,0.7372
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,4096,1,0,3.1604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,8192,1,0,5.5478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,16384,1,0,11.5277
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,0,0.7380
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,4,1,0,0.7346
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,8,1,0,0.7415
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,16,1,0,0.7423
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,32,1,0,0.7290
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,64,1,0,0.7360
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,128,1,0,0.7323
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,256,1,0,0.7415
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,512,1,0,0.7369
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1024,1,0,0.7472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,2048,1,0,0.8817
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,4096,1,0,5.1901
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,8192,1,0,10.7089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,16384,1,0,23.6723
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,0,0.7283
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,4,1,0,0.7250
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,8,1,0,0.7291
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,16,1,0,0.7259
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,32,1,0,0.7313
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,64,1,0,0.7284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,128,1,0,0.7312
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,256,1,0,0.7228
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,512,1,0,0.7240
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1024,1,0,0.7351
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,2048,1,0,0.8076
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,4096,1,0,9.9020
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,8192,1,0,21.6325
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,16384,1,0,47.7356
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,0,0.7278
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,4,1,0,0.7227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,8,1,0,0.7341
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,16,1,0,0.7191
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,32,1,0,0.7326
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,64,1,0,0.7333
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,128,1,0,0.7362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,256,1,0,0.7380
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,512,1,0,0.7407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1024,1,0,0.7727
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,2048,1,0,0.9576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,4096,1,0,19.9009
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,8192,1,0,44.0975
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,16384,1,0,100.2904
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,0,0.7335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,4,1,0,0.7396
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,8,1,0,0.7375
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,16,1,0,0.7295
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,32,1,0,0.7371
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,64,1,0,0.7343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,128,1,0,0.7330
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,256,1,0,0.7457
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,512,1,0,0.7859
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1024,1,0,0.8942
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,2048,1,0,1.3485
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,4096,1,0,40.8217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,0,0.7354
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,4,1,0,0.7419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,8,1,0,0.7322
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,16,1,0,0.7355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,32,1,0,0.7377
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,64,1,0,0.7388
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,128,1,0,0.7425
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,256,1,0,0.7792
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,512,1,0,0.8640
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1024,1,0,1.2332
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,2048,1,0,2.5310
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,4096,1,0,85.1712
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,0,0.7319
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,4,1,0,0.7360
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,8,1,0,0.7381
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,16,1,0,0.7311
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,32,1,0,0.7282
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,64,1,0,0.7453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,128,1,0,0.7891
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,256,1,0,0.8555
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,512,1,0,1.1750
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1024,1,0,2.3165
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,2048,1,0,5.0857
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,0,0.7323
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,4,1,0,0.7359
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,8,1,0,0.7402
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,16,1,0,0.7486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,32,1,0,0.7483
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,64,1,0,0.7796
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,128,1,0,0.8429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,256,1,0,1.1496
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,512,1,0,2.1822
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1024,1,0,4.5196
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,0,0.7460
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,4,1,0,0.7435
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,8,1,0,0.7568
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,16,1,0,0.7660
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,32,1,0,0.8095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,64,1,0,0.8945
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,128,1,0,1.1507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,256,1,0,2.1310
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,512,1,0,4.3167
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,0,0.8537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,4,1,0,0.8453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,8,1,0,0.8544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,16,1,0,0.8897
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,32,1,0,0.9755
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,64,1,0,1.2337
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,128,1,0,2.1473
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,256,1,0,4.2002
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,0,1.0034
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,4,1,0,1.0273
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,8,1,0,1.0748
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,16,1,0,1.1419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,32,1,0,1.3875
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,64,1,0,2.3057
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,128,1,0,4.2254
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,0,0.7477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,4,1,0,0.7636
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,8,1,0,0.7571
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,16,1,0,0.7612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,32,1,0,0.7487
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,64,1,0,0.7471
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,128,1,0,0.7418
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,256,1,0,0.7416
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,512,1,0,0.7371
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1024,1,0,0.7392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,2048,1,0,0.7174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,4096,1,0,3.0557
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,8192,1,0,5.3348
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,16384,1,0,11.2359
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,0,0.7291
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,4,1,0,0.7253
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,8,1,0,0.7246
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,16,1,0,0.7238
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,32,1,0,0.7219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,64,1,0,0.7118
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,128,1,0,0.7189
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,256,1,0,0.7217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,512,1,0,0.7242
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1024,1,0,0.7250
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,2048,1,0,0.7225
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,4096,1,0,5.0033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,8192,1,0,10.2475
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,16384,1,0,22.7540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,0,0.7111
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,4,1,0,0.7089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,8,1,0,0.7139
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,16,1,0,0.7149
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,32,1,0,0.7168
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,64,1,0,0.7221
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,128,1,0,0.7214
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,256,1,0,0.7236
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,512,1,0,0.7280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1024,1,0,0.7255
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,2048,1,0,0.7538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,4096,1,0,9.5970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,8192,1,0,19.9749
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,16384,1,0,45.8459
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,0,0.7333
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,4,1,0,0.7106
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,8,1,0,0.7080
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,16,1,0,0.7097
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,32,1,0,0.7146
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,64,1,0,0.7267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,128,1,0,0.7137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,256,1,0,0.7189
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,512,1,0,0.7173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1024,1,0,0.7243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,2048,1,0,0.7999
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,4096,1,0,19.1054
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,8192,1,0,42.1661
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,16384,1,0,96.4043
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,0,0.7193
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,4,1,0,0.7228
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,8,1,0,0.7176
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,16,1,0,0.7256
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,32,1,0,0.7237
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,64,1,0,0.7205
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,128,1,0,0.7247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,256,1,0,0.7198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,512,1,0,0.7462
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1024,1,0,0.7758
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,2048,1,0,0.9721
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,4096,1,0,40.0036
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,0,0.7139
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,4,1,0,0.7189
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,8,1,0,0.7200
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,16,1,0,0.7164
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,32,1,0,0.7280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,64,1,0,0.7211
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,128,1,0,0.7134
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,256,1,0,0.7398
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,512,1,0,0.7898
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1024,1,0,0.9089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,2048,1,0,1.7235
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,4096,1,0,81.7255
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,0,0.7173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,4,1,0,0.7229
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,8,1,0,0.7279
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,16,1,0,0.7172
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,32,1,0,0.7216
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,64,1,0,0.7162
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,128,1,0,0.7370
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,256,1,0,0.7742
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,512,1,0,0.8699
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1024,1,0,1.6132
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,2048,1,0,3.2789
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,0,0.7170
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,4,1,0,0.7279
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,8,1,0,0.7271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,16,1,0,0.7295
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,32,1,0,0.7170
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,64,1,0,0.7365
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,128,1,0,0.7734
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,256,1,0,0.8625
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,512,1,0,1.5591
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1024,1,0,3.0656
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,0,0.7219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,4,1,0,0.7194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,8,1,0,0.7138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,16,1,0,0.7178
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,32,1,0,0.7347
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,64,1,0,0.7786
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,128,1,0,0.8614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,256,1,0,1.5337
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,512,1,0,2.9590
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,0,0.7275
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,4,1,0,0.7296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,8,1,0,0.7354
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,16,1,0,0.7561
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,32,1,0,0.8650
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,64,1,0,0.9044
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,128,1,0,1.5412
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,256,1,0,2.9151
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,0,0.8156
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,4,1,0,0.8223
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,8,1,0,0.8505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,16,1,0,0.8889
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,32,1,0,0.9859
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,64,1,0,1.6167
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,128,1,0,2.9299
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,0,0.7570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,4,1,0,0.7610
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,8,1,0,0.7370
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,16,1,0,0.7347
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,32,1,0,0.7348
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,64,1,0,0.7422
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,128,1,0,0.7435
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,256,1,0,0.7503
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,512,1,0,0.7579
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1024,1,0,0.7929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,2048,1,0,0.9712
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,4096,1,0,2.7268
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,8192,1,0,4.8633
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,16384,1,0,10.4067
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,0,0.7638
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,4,1,0,0.7252
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,8,1,0,0.7453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,16,1,0,0.7402
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,32,1,0,0.7331
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,64,1,0,0.7363
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,128,1,0,0.7505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,256,1,0,0.7513
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,512,1,0,0.7939
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1024,1,0,0.8995
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,2048,1,0,1.2918
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,4096,1,0,4.4240
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,8192,1,0,9.4471
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,16384,1,0,21.3702
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,0,0.7576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,4,1,0,0.8770
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,8,1,0,0.7349
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,16,1,0,0.7348
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,32,1,0,0.7245
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,64,1,0,0.7397
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,128,1,0,0.7392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,256,1,0,0.7763
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,512,1,0,0.8589
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1024,1,0,1.1699
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,2048,1,0,1.9611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,4096,1,0,8.6278
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,8192,1,0,18.9094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,16384,1,0,26.7152
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,0,0.7286
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,4,1,0,0.7342
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,8,1,0,0.7432
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,16,1,0,0.7398
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,32,1,0,0.7449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,64,1,0,0.7501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,128,1,0,0.7840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,256,1,0,0.8580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,512,1,0,1.1124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1024,1,0,1.7552
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,2048,1,0,3.6350
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,4096,1,0,17.3596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,8192,1,0,23.5938
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,0,0.7327
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,4,1,0,0.7283
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,8,1,0,0.7339
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,16,1,0,0.7401
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,32,1,0,0.7461
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,64,1,0,0.7881
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,128,1,0,0.8534
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,256,1,0,1.0915
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,512,1,0,1.6490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1024,1,0,3.2856
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,2048,1,0,7.4261
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,4096,1,0,21.6145
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,0,0.7504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,4,1,0,0.7593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,8,1,0,0.7634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,16,1,0,0.7641
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,32,1,0,0.8120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,64,1,0,0.8938
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,128,1,0,1.1010
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,256,1,0,1.6236
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,512,1,0,3.0630
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1024,1,0,6.6748
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,2048,1,0,15.0705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,0,0.8425
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,4,1,0,0.8576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,8,1,0,0.8516
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,16,1,0,0.8867
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,32,1,0,0.9686
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,64,1,0,1.1737
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,128,1,0,1.6151
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,256,1,0,2.9609
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,512,1,0,6.1142
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1024,1,0,13.4239
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,0,0.9955
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,4,1,0,1.0089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,8,1,0,1.0500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,16,1,0,1.1305
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,32,1,0,1.3165
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,64,1,0,1.7520
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,128,1,0,2.9708
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,256,1,0,5.8972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,512,1,0,12.6577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,0,1.3543
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,4,1,0,1.3739
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,8,1,0,1.4542
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,16,1,0,1.6313
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,32,1,0,2.0610
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,64,1,0,3.2634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,128,1,0,6.0537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,256,1,0,12.0664
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,0,2.0039
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,4,1,0,2.1064
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,8,1,0,2.2798
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,16,1,0,2.6701
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,32,1,0,3.8794
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,64,1,0,6.5881
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,128,1,0,12.2597
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,0,3.3333
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,4,1,0,3.5777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,8,1,0,4.0048
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,16,1,0,5.1053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,32,1,0,7.9595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,64,1,0,13.8631
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,0,0.8000
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,4,1,0,0.7902
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,8,1,0,0.7848
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,16,1,0,0.7798
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,32,1,0,0.7716
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,64,1,0,0.7763
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,128,1,0,0.7604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,256,1,0,0.7651
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,512,1,0,0.7724
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1024,1,0,0.7808
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,2048,1,0,0.8339
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,4096,1,0,2.3090
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,8192,1,0,3.9741
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,16384,1,0,8.4251
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,0,0.7783
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,4,1,0,0.7701
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,8,1,0,0.7668
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,16,1,0,0.7732
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,32,1,0,0.7684
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,64,1,0,0.7656
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,128,1,0,0.7642
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,256,1,0,0.7733
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,512,1,0,0.7835
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1024,1,0,0.8222
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,2048,1,0,0.9766
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,4096,1,0,3.5558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,8192,1,0,7.5310
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,16384,1,0,16.5476
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,0,0.7727
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,4,1,0,0.7610
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,8,1,0,0.7677
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,16,1,0,0.7653
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,32,1,0,0.7646
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,64,1,0,0.7658
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,128,1,0,0.7831
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,256,1,0,0.7909
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,512,1,0,0.8294
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1024,1,0,0.9233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,2048,1,0,1.2918
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,4096,1,0,6.6981
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,8192,1,0,14.7250
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,16384,1,0,19.7485
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,0,0.7734
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,4,1,0,0.7690
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,8,1,0,0.7629
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,16,1,0,0.7619
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,32,1,0,0.7675
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,64,1,0,0.7626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,128,1,0,0.7848
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,256,1,0,0.8230
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,512,1,0,0.8879
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1024,1,0,1.1858
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,2048,1,0,1.9788
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,4096,1,0,13.0855
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,8192,1,0,16.8285
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,16384,1,0,45.1667
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,0,0.7977
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,4,1,0,0.7935
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,8,1,0,0.7868
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,16,1,0,0.7897
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,32,1,0,0.7849
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,64,1,0,0.7886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,128,1,0,0.8251
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,256,1,0,0.8682
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,512,1,0,1.1230
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1024,1,0,1.7786
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,2048,1,0,3.8385
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,4096,1,0,14.7930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,0,0.7663
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,4,1,0,0.7717
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,8,1,0,0.7697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,16,1,0,0.7672
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,32,1,0,0.7834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,64,1,0,0.8244
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,128,1,0,0.8754
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,256,1,0,1.0992
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,512,1,0,1.6667
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1024,1,0,3.3777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,2048,1,0,7.7510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,4096,1,0,32.7119
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,0,0.7743
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,4,1,0,0.7783
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,8,1,0,0.8174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,16,1,0,0.7935
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,32,1,0,0.8297
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,64,1,0,0.9120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,128,1,0,1.1015
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,256,1,0,1.6249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,512,1,0,3.1684
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1024,1,0,6.8848
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,2048,1,0,15.7767
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,0,0.8725
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,4,1,0,0.8759
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,8,1,0,0.8890
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,16,1,0,0.9238
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,32,1,0,0.9932
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,64,1,0,1.1746
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,128,1,0,1.6358
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,256,1,0,3.0663
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,512,1,0,6.3683
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1024,1,0,14.1083
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,0,1.0630
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,4,1,0,1.0566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,8,1,0,1.0987
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,16,1,0,1.1477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,32,1,0,1.3364
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,64,1,0,1.7840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,128,1,0,3.1192
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,256,1,0,6.2178
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,512,1,0,13.2432
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,0,1.4008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,4,1,0,1.4231
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,8,1,0,1.4801
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,16,1,0,1.6399
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,32,1,0,2.0858
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,64,1,0,3.4034
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,128,1,0,6.3274
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,256,1,0,12.6360
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,0,2.0918
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,4,1,0,2.1462
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,8,1,0,2.2946
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,16,1,0,2.7019
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,32,1,0,3.9942
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,64,1,0,6.9132
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,128,1,0,12.9652
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,0,0.7444
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,4,1,0,0.7795
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,8,1,0,0.7685
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,16,1,0,0.7538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,32,1,0,0.7560
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,64,1,0,0.7584
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,128,1,0,0.7595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,256,1,0,0.7576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,512,1,0,0.7650
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1024,1,0,0.7683
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,2048,1,0,0.7777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,4096,1,0,2.0483
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,8192,1,0,3.4797
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,16384,1,0,7.4063
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,0,0.7626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,4,1,0,0.7544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,8,1,0,0.7587
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,16,1,0,0.7538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,32,1,0,0.7580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,64,1,0,0.7554
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,128,1,0,0.7710
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,256,1,0,0.7566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,512,1,0,0.7569
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1024,1,0,0.7740
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,2048,1,0,0.8337
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,4096,1,0,3.0561
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,8192,1,0,6.4828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,16384,1,0,14.5183
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,0,0.7575
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,4,1,0,0.7418
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,8,1,0,0.7547
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,16,1,0,0.7559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,32,1,0,0.7555
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,64,1,0,0.7594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,128,1,0,0.7568
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,256,1,0,0.7646
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,512,1,0,0.7664
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1024,1,0,0.8067
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,2048,1,0,0.9871
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,4096,1,0,5.6339
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,8192,1,0,12.7124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,16384,1,0,16.4934
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,0,0.7571
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,4,1,0,0.7498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,8,1,0,0.7671
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,16,1,0,0.7511
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,32,1,0,0.7437
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,64,1,0,0.7613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,128,1,0,0.7547
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,256,1,0,0.7716
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,512,1,0,0.8137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1024,1,0,0.9185
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,2048,1,0,1.3023
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,4096,1,0,11.0262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,8192,1,0,13.5884
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,16384,1,0,37.5870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,0,0.7529
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,4,1,0,0.7584
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,8,1,0,0.7562
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,16,1,0,0.7548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,32,1,0,0.7567
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,64,1,0,0.7676
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,128,1,0,0.7668
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,256,1,0,0.8111
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,512,1,0,0.8820
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1024,1,0,1.1800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,2048,1,0,2.1984
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,4096,1,0,11.5770
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,0,0.7583
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,4,1,0,0.7535
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,8,1,0,0.7542
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,16,1,0,0.7623
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,32,1,0,0.7548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,64,1,0,0.7732
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,128,1,0,0.8094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,256,1,0,0.8730
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,512,1,0,1.1224
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1024,1,0,1.9742
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,2048,1,0,4.3370
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,4096,1,0,25.7155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,0,0.7624
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,4,1,0,0.7531
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,8,1,0,0.7602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,16,1,0,0.7596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,32,1,0,0.7740
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,64,1,0,0.7935
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,128,1,0,0.8676
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,256,1,0,1.0967
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,512,1,0,1.8598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1024,1,0,3.8597
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,2048,1,0,8.9506
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,0,0.7530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,4,1,0,0.7572
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,8,1,0,0.7603
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,16,1,0,0.7879
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,32,1,0,0.8233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,64,1,0,0.9099
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,128,1,0,1.1022
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,256,1,0,1.8020
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,512,1,0,3.5924
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1024,1,0,8.0074
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,0,0.8772
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,4,1,0,0.8724
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,8,1,0,0.8774
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,16,1,0,0.9098
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,32,1,0,0.9883
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,64,1,0,1.1809
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,128,1,0,1.8178
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,256,1,0,3.4519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,512,1,0,7.2813
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,0,1.0396
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,4,1,0,1.0620
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,8,1,0,1.0894
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,16,1,0,1.1675
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,32,1,0,1.3438
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,64,1,0,1.9834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,128,1,0,3.5104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,256,1,0,7.0274
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,0,1.3816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,4,1,0,1.4390
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,8,1,0,1.5081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,16,1,0,1.6536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,32,1,0,2.2870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,64,1,0,3.8026
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,128,1,0,7.1820
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,0,0.7509
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,4,1,0,0.7745
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,8,1,0,0.7753
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,16,1,0,0.7696
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,32,1,0,0.7645
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,64,1,0,0.7765
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,128,1,0,0.7781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,256,1,0,0.7641
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,512,1,0,0.7740
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1024,1,0,0.7800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,2048,1,0,0.7803
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,4096,1,0,1.9581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,8192,1,0,3.2178
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,16384,1,0,6.8026
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,0,0.7795
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,4,1,0,0.7837
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,8,1,0,0.7717
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,16,1,0,0.7695
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,32,1,0,0.7774
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,64,1,0,0.7725
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,128,1,0,0.7699
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,256,1,0,0.7769
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,512,1,0,0.7762
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1024,1,0,0.7758
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,2048,1,0,0.7843
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,4096,1,0,2.7917
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,8192,1,0,5.8946
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,16384,1,0,13.1307
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,0,0.7592
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,4,1,0,0.7762
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,8,1,0,0.7738
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,16,1,0,0.7697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,32,1,0,0.7667
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,64,1,0,0.9389
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,128,1,0,0.7648
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,256,1,0,0.7646
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,512,1,0,0.7744
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1024,1,0,0.7863
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,2048,1,0,0.8474
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,4096,1,0,5.0646
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,8192,1,0,11.3534
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,16384,1,0,14.9723
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,0,0.7712
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,4,1,0,0.7656
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,8,1,0,0.7642
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,16,1,0,0.7721
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,32,1,0,0.7726
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,64,1,0,0.7730
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,128,1,0,0.7698
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,256,1,0,0.7771
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,512,1,0,0.7828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1024,1,0,0.8234
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,2048,1,0,0.9999
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,4096,1,0,9.7120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,8192,1,0,12.0632
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,16384,1,0,34.0404
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,0,0.7552
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,4,1,0,0.7624
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,8,1,0,0.7612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,16,1,0,0.7617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,32,1,0,0.7780
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,64,1,0,0.7688
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,128,1,0,0.7666
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,256,1,0,0.7805
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,512,1,0,0.8059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1024,1,0,0.9233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,2048,1,0,1.3789
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,4096,1,0,10.0421
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,0,0.7579
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,4,1,0,0.7676
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,8,1,0,0.7637
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,16,1,0,0.7698
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,32,1,0,0.7656
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,64,1,0,0.7730
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,128,1,0,0.7784
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,256,1,0,0.8094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,512,1,0,0.8963
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1024,1,0,1.2622
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,2048,1,0,2.5741
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,4096,1,0,22.5709
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,0,0.7604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,4,1,0,0.7665
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,8,1,0,0.7627
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,16,1,0,0.7629
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,32,1,0,0.7626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,64,1,0,0.7683
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,128,1,0,0.8066
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,256,1,0,0.8813
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,512,1,0,1.2001
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1024,1,0,2.3455
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,2048,1,0,5.1970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,0,0.7687
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,4,1,0,0.7650
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,8,1,0,0.7720
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,16,1,0,0.7728
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,32,1,0,0.7769
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,64,1,0,0.8121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,128,1,0,0.8843
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,256,1,0,1.1754
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,512,1,0,2.2282
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1024,1,0,4.6623
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,0,0.7787
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,4,1,0,0.7717
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,8,1,0,0.7790
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,16,1,0,0.7923
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,32,1,0,0.8339
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,64,1,0,0.9186
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,128,1,0,1.1790
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,256,1,0,2.1766
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,512,1,0,4.3398
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,0,0.8596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,4,1,0,0.8808
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,8,1,0,0.8781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,16,1,0,0.9223
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,32,1,0,1.0234
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,64,1,0,1.2582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,128,1,0,2.1925
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,256,1,0,4.2634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,0,1.0399
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,4,1,0,1.0709
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,8,1,0,1.1028
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,16,1,0,1.1696
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,32,1,0,1.4139
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,64,1,0,2.3529
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,128,1,0,4.2996
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,0,0.7360
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,4,1,0,0.7506
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,8,1,0,0.7469
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,16,1,0,0.7442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,32,1,0,0.7452
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,64,1,0,0.7395
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,128,1,0,0.7476
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,256,1,0,0.7400
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,512,1,0,0.7450
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1024,1,0,0.7484
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,2048,1,0,0.7410
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,4096,1,0,1.8692
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,8192,1,0,3.0668
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,16384,1,0,6.5638
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,0,0.7490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,4,1,0,0.7456
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,8,1,0,0.7482
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,16,1,0,0.7501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,32,1,0,0.7407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,64,1,0,0.7427
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,128,1,0,0.7411
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,256,1,0,0.7506
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,512,1,0,0.7439
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1024,1,0,0.7506
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,2048,1,0,0.7493
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,4096,1,0,2.6424
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,8192,1,0,5.6484
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,16384,1,0,12.6374
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,0,0.7439
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,4,1,0,0.7607
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,8,1,0,0.7387
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,16,1,0,0.7396
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,32,1,0,0.7439
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,64,1,0,0.7413
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,128,1,0,0.7494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,256,1,0,0.7459
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,512,1,0,0.7500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1024,1,0,0.7550
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,2048,1,0,0.7655
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,4096,1,0,4.8236
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,8192,1,0,10.8589
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,16384,1,0,14.2474
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,0,0.7424
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,4,1,0,0.7391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,8,1,0,0.7389
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,16,1,0,0.7447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,32,1,0,0.7450
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,64,1,0,0.7491
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,128,1,0,0.7366
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,256,1,0,0.7400
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,512,1,0,0.7445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1024,1,0,0.7625
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,2048,1,0,0.8383
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,4096,1,0,9.2292
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,8192,1,0,11.3328
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,16384,1,0,35.0301
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,0,0.7309
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,4,1,0,0.7202
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,8,1,0,0.7239
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,16,1,0,0.7296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,32,1,0,0.7399
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,64,1,0,0.7372
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,128,1,0,0.7365
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,256,1,0,0.7401
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,512,1,0,0.7486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1024,1,0,0.7954
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,2048,1,0,0.9832
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,4096,1,0,9.3059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,0,0.7244
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,4,1,0,0.7290
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,8,1,0,0.7322
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,16,1,0,0.7428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,32,1,0,0.7320
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,64,1,0,0.7386
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,128,1,0,0.7525
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,256,1,0,0.7462
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,512,1,0,0.7929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1024,1,0,0.9256
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,2048,1,0,1.7707
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,4096,1,0,21.2747
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,0,0.7303
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,4,1,0,0.7302
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,8,1,0,0.7245
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,16,1,0,0.7242
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,32,1,0,0.7365
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,64,1,0,0.7335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,128,1,0,0.7474
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,256,1,0,0.7907
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,512,1,0,0.8935
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1024,1,0,1.6579
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,2048,1,0,3.3586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,0,0.7230
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,4,1,0,0.7390
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,8,1,0,0.7396
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,16,1,0,0.7723
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,32,1,0,0.7345
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,64,1,0,0.7469
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,128,1,0,0.7949
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,256,1,0,0.8829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,512,1,0,1.6028
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1024,1,0,3.1476
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,0,0.7380
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,4,1,0,0.7376
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,8,1,0,0.7402
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,16,1,0,0.7371
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,32,1,0,0.7488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,64,1,0,0.7875
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,128,1,0,0.8904
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,256,1,0,1.5796
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,512,1,0,3.0394
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,0,0.7521
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,4,1,0,0.7549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,8,1,0,0.7586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,16,1,0,0.7824
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,32,1,0,0.8289
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,64,1,0,0.9242
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,128,1,0,1.5876
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,256,1,0,2.9954
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,0,0.8435
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,4,1,0,0.8613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,8,1,0,0.8673
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,16,1,0,0.9191
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,32,1,0,1.0101
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,64,1,0,1.6644
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,128,1,0,3.0126
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,0,0.5375
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,4,1,0,0.5440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,8,1,0,0.5420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,16,1,0,0.5456
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,32,1,0,0.5361
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,64,1,0,0.5427
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,128,1,0,0.5566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,256,1,0,0.6227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,512,1,0,0.7640
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1024,1,0,1.0984
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,2048,1,0,1.7586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,4096,1,0,3.8419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,8192,1,0,7.3817
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,16384,1,0,15.6631
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,0,0.5497
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,4,1,0,0.5429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,8,1,0,0.5415
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,16,1,0,0.5507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,32,1,0,0.5512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,64,1,0,0.5571
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,128,1,0,0.6161
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,256,1,0,0.7632
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,512,1,0,1.0870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1024,1,0,1.7363
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,2048,1,0,3.1071
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,4096,1,0,7.1449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,8192,1,0,15.0970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,16384,1,0,31.7329
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,0,0.5619
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,4,1,0,0.5667
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,8,1,0,0.5588
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,16,1,0,0.5623
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,32,1,0,0.5619
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,64,1,0,0.6208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,128,1,0,0.7611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,256,1,0,1.0826
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,512,1,0,1.7314
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1024,1,0,3.0767
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,2048,1,0,6.2703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,4096,1,0,14.3357
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,8192,1,0,30.7843
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,16384,1,0,63.9853
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,0,0.5501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,4,1,0,0.5530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,8,1,0,0.5465
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,16,1,0,0.5547
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,32,1,0,0.6225
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,64,1,0,0.7676
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,128,1,0,1.0798
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,256,1,0,1.7417
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,512,1,0,3.0705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1024,1,0,6.1724
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,2048,1,0,12.6871
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,4096,1,0,28.6538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,8192,1,0,60.4297
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,0,0.5207
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,4,1,0,0.5346
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,8,1,0,0.5306
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,16,1,0,0.5274
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,32,1,0,0.5288
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,64,1,0,0.5260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,128,1,0,0.5306
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,256,1,0,0.5904
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,512,1,0,0.7054
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1024,1,0,0.9675
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,2048,1,0,1.4987
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,4096,1,0,3.2703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,8192,1,0,6.0104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,16384,1,0,12.5932
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,0,0.5405
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,4,1,0,0.5337
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,8,1,0,0.5276
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,16,1,0,0.5373
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,32,1,0,0.5344
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,64,1,0,0.5317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,128,1,0,0.5952
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,256,1,0,0.7090
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,512,1,0,0.9765
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1024,1,0,1.4937
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,2048,1,0,2.5798
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,4096,1,0,5.6525
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,8192,1,0,12.3329
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,16384,1,0,26.5495
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,0,0.5365
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,4,1,0,0.5341
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,8,1,0,0.5345
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,16,1,0,0.5226
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,32,1,0,0.5342
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,64,1,0,0.5943
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,128,1,0,0.7074
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,256,1,0,0.9691
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,512,1,0,1.4848
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1024,1,0,2.5459
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,2048,1,0,4.8674
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,4096,1,0,11.4918
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,8192,1,0,25.0438
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,16384,1,0,52.2626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,0,0.5368
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,4,1,0,0.5663
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,8,1,0,0.5281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,16,1,0,0.5339
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,32,1,0,0.5962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,64,1,0,0.7135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,128,1,0,0.9708
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,256,1,0,1.4823
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,512,1,0,2.5281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1024,1,0,4.7613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,2048,1,0,9.8964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,4096,1,0,23.4523
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,8192,1,0,49.2938
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,16384,1,0,111.7162
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,0,0.5408
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,4,1,0,0.5394
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,8,1,0,0.5374
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,16,1,0,0.5990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,32,1,0,0.7119
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,64,1,0,0.9756
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,128,1,0,1.4903
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,256,1,0,2.5258
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,512,1,0,4.6757
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1024,1,0,9.4605
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,2048,1,0,20.6160
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,4096,1,0,46.9524
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,0,0.5413
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,4,1,0,0.5401
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,8,1,0,0.6009
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,16,1,0,0.7132
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,32,1,0,0.9744
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,64,1,0,1.4851
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,128,1,0,2.5172
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,256,1,0,4.6701
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,512,1,0,9.4211
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1024,1,0,19.3827
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,2048,1,0,40.0552
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,4096,1,0,99.2260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,0,0.5320
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,4,1,0,0.6002
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,8,1,0,0.7189
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,16,1,0,0.9825
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,32,1,0,1.4783
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,64,1,0,2.5137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,128,1,0,4.6540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,256,1,0,9.2273
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,512,1,0,18.7285
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1024,1,0,38.5121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,2048,1,0,82.4478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,0,0.5331
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,4,1,0,0.7181
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,8,1,0,0.9716
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,16,1,0,1.4784
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,32,1,0,2.5051
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,64,1,0,4.6568
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,128,1,0,9.1317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,256,1,0,18.4094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,512,1,0,39.4332
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1024,1,0,80.2982
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,0,0.6015
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,4,1,0,0.9890
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,8,1,0,1.4758
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,16,1,0,2.5150
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,32,1,0,4.6555
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,64,1,0,9.1393
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,128,1,0,18.1292
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,256,1,0,37.4374
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,512,1,0,77.8970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,0,0.7180
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,4,1,0,1.4828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,8,1,0,2.5147
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,16,1,0,4.6600
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,32,1,0,9.1353
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,64,1,0,18.1349
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,128,1,0,36.4654
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,256,1,0,76.6611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,0,0.9694
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,4,1,0,2.5196
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,8,1,0,4.6663
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,16,1,0,9.1494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,32,1,0,18.1343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,64,1,0,36.6535
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,128,1,0,77.7816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,0,0.5235
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,4,1,0,0.5405
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,8,1,0,0.5313
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,16,1,0,0.5314
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,32,1,0,0.5231
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,64,1,0,0.5317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,128,1,0,0.5316
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,256,1,0,0.5773
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,512,1,0,0.6776
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1024,1,0,0.9239
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,2048,1,0,1.3819
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,4096,1,0,3.0178
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,8192,1,0,5.3721
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,16384,1,0,11.3249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,0,0.5264
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,4,1,0,0.5265
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,8,1,0,0.5196
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,16,1,0,0.5309
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,32,1,0,0.5260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,64,1,0,0.5240
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,128,1,0,0.5765
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,256,1,0,0.6832
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,512,1,0,0.9118
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1024,1,0,1.3643
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,2048,1,0,2.3078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,4096,1,0,5.1157
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,8192,1,0,10.7079
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,16384,1,0,23.0061
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,0,0.5279
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,4,1,0,0.5307
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,8,1,0,0.5317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,16,1,0,0.5322
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,32,1,0,0.5282
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,64,1,0,0.5802
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,128,1,0,0.6771
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,256,1,0,0.9197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,512,1,0,1.3744
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1024,1,0,2.2905
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,2048,1,0,4.1399
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,4096,1,0,9.7840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,8192,1,0,21.2112
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,16384,1,0,46.0815
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,0,0.5394
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,4,1,0,0.5609
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,8,1,0,0.5394
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,16,1,0,0.5390
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,32,1,0,0.5854
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,64,1,0,0.6850
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,128,1,0,0.9203
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,256,1,0,1.3585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,512,1,0,2.2850
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1024,1,0,4.0975
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,2048,1,0,8.6036
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,4096,1,0,19.7044
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,8192,1,0,43.3939
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,16384,1,0,98.3233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,0,0.5386
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,4,1,0,0.5391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,8,1,0,0.5371
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,16,1,0,0.5944
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,32,1,0,0.6943
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,64,1,0,0.9241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,128,1,0,1.3570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,256,1,0,2.2738
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,512,1,0,4.0753
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1024,1,0,8.0965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,2048,1,0,17.2816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,4096,1,0,40.8705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,0,0.5299
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,4,1,0,0.5355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,8,1,0,0.5855
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,16,1,0,0.6875
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,32,1,0,0.9268
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,64,1,0,1.3538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,128,1,0,2.2609
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,256,1,0,4.0692
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,512,1,0,7.9075
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1024,1,0,16.5011
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,2048,1,0,35.4157
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,4096,1,0,85.1797
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,0,0.5364
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,4,1,0,0.5858
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,8,1,0,0.6894
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,16,1,0,0.9263
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,32,1,0,1.3570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,64,1,0,2.2654
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,128,1,0,4.0649
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,256,1,0,7.8632
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,512,1,0,15.7776
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1024,1,0,33.2796
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,2048,1,0,70.6499
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,0,0.5327
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,4,1,0,0.6951
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,8,1,0,0.9182
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,16,1,0,1.3606
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,32,1,0,2.2656
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,64,1,0,4.0584
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,128,1,0,7.8494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,256,1,0,15.5660
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,512,1,0,32.0068
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1024,1,0,67.0425
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,0,0.5845
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,4,1,0,0.9177
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,8,1,0,1.3519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,16,1,0,2.2660
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,32,1,0,4.0571
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,64,1,0,7.8513
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,128,1,0,15.5427
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,256,1,0,31.1209
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,512,1,0,64.4173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,0,0.6848
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,4,1,0,1.3595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,8,1,0,2.2633
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,16,1,0,4.0487
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,32,1,0,7.8476
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,64,1,0,15.5406
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,128,1,0,30.9859
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,256,1,0,62.7855
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,0,0.9169
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,4,1,0,2.2578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,8,1,0,4.0612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,16,1,0,7.8442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,32,1,0,15.5383
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,64,1,0,31.1020
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,128,1,0,62.4627
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,0,0.5446
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,4,1,0,0.5617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,8,1,0,0.5515
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,16,1,0,0.5453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,32,1,0,0.5448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,64,1,0,0.5466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,128,1,0,0.5481
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,256,1,0,0.5880
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,512,1,0,0.6678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1024,1,0,0.9128
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,2048,1,0,1.3419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,4096,1,0,2.9027
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,8192,1,0,5.0724
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,16384,1,0,10.7376
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,0,0.5452
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,4,1,0,0.5417
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,8,1,0,0.5457
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,16,1,0,0.5452
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,32,1,0,0.5390
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,64,1,0,0.5431
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,128,1,0,0.5830
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,256,1,0,0.6637
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,512,1,0,0.9000
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1024,1,0,1.3247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,2048,1,0,2.1968
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,4096,1,0,4.7989
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,8192,1,0,9.8136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,16384,1,0,21.6401
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,0,0.5489
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,4,1,0,0.5420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,8,1,0,0.5414
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,16,1,0,0.5410
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,32,1,0,0.5454
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,64,1,0,0.5800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,128,1,0,0.6688
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,256,1,0,0.9043
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,512,1,0,1.3385
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1024,1,0,2.1984
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,2048,1,0,3.9379
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,4096,1,0,9.4951
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,8192,1,0,19.9583
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,16384,1,0,43.3781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,0,0.5347
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,4,1,0,0.5802
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,8,1,0,0.5414
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,16,1,0,0.5479
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,32,1,0,0.5777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,64,1,0,0.6712
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,128,1,0,0.8960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,256,1,0,1.3262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,512,1,0,2.1626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1024,1,0,3.8682
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,2048,1,0,7.5420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,4096,1,0,19.4158
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,8192,1,0,40.3811
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,16384,1,0,91.9878
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,0,0.5404
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,4,1,0,0.5516
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,8,1,0,0.5455
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,16,1,0,0.5830
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,32,1,0,0.6725
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,64,1,0,0.8892
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,128,1,0,1.3212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,256,1,0,2.1585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,512,1,0,3.8512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1024,1,0,7.3963
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,2048,1,0,15.8558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,4096,1,0,38.4868
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,0,0.5573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,4,1,0,0.5486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,8,1,0,0.5813
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,16,1,0,0.6730
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,32,1,0,0.9069
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,64,1,0,1.3129
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,128,1,0,2.1472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,256,1,0,3.8473
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,512,1,0,7.2718
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1024,1,0,14.8540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,2048,1,0,32.3377
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,4096,1,0,80.0929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,0,0.5417
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,4,1,0,0.5992
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,8,1,0,0.6789
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,16,1,0,0.8970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,32,1,0,1.3106
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,64,1,0,2.1545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,128,1,0,3.8333
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,256,1,0,7.2594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,512,1,0,14.3824
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1024,1,0,30.4536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,2048,1,0,64.2094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,0,0.5422
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,4,1,0,0.6772
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,8,1,0,0.9023
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,16,1,0,1.3198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,32,1,0,2.1452
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,64,1,0,3.8216
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,128,1,0,7.2396
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,256,1,0,14.3402
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,512,1,0,29.5243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1024,1,0,60.4309
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,0,0.5808
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,4,1,0,0.8965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,8,1,0,1.3099
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,16,1,0,2.1430
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,32,1,0,3.8194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,64,1,0,7.2430
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,128,1,0,14.3097
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,256,1,0,28.6198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,512,1,0,58.3677
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,0,0.6787
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,4,1,0,1.3141
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,8,1,0,2.1500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,16,1,0,3.8197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,32,1,0,7.2302
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,64,1,0,14.3066
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,128,1,0,28.5652
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,256,1,0,57.3989
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,0,0.8954
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,4,1,0,2.1533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,8,1,0,3.8260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,16,1,0,7.2344
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,32,1,0,14.2950
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,64,1,0,28.5608
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,128,1,0,56.8743
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,0,0.4921
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,4,1,0,0.5206
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,8,1,0,0.5171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,16,1,0,0.5249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,32,1,0,0.5232
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,64,1,0,0.5216
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,128,1,0,0.5206
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,256,1,0,0.5342
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,512,1,0,0.5567
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1024,1,0,0.6582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,2048,1,0,0.9722
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,4096,1,0,2.3761
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,8192,1,0,4.4215
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,16384,1,0,23.5926
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,0,0.5174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,4,1,0,0.5168
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,8,1,0,0.5121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,16,1,0,0.5248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,32,1,0,0.5170
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,64,1,0,0.5153
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,128,1,0,0.5324
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,256,1,0,0.5516
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,512,1,0,0.6351
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1024,1,0,0.8717
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,2048,1,0,1.5779
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,4096,1,0,4.0138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,8192,1,0,8.9339
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,16384,1,0,19.9084
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,0,0.5235
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,4,1,0,0.5217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,8,1,0,0.5157
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,16,1,0,0.5267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,32,1,0,0.5278
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,64,1,0,0.5510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,128,1,0,0.5632
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,256,1,0,0.6362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,512,1,0,0.8353
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1024,1,0,1.3833
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,2048,1,0,3.0756
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,4096,1,0,8.1865
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,8192,1,0,17.7429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,16384,1,0,23.7383
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,0,0.5189
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,4,1,0,0.5110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,8,1,0,0.5165
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,16,1,0,0.5160
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,32,1,0,0.5361
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,64,1,0,0.5590
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,128,1,0,0.6164
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,256,1,0,0.8016
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,512,1,0,1.2836
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1024,1,0,2.6354
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,2048,1,0,6.2332
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,4096,1,0,16.3997
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,8192,1,0,20.5114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,16384,1,0,52.0866
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,0,0.5347
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,4,1,0,0.5196
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,8,1,0,0.5206
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,16,1,0,0.5343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,32,1,0,0.5547
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,64,1,0,0.6247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,128,1,0,0.7975
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,256,1,0,1.2398
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,512,1,0,2.4385
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1024,1,0,5.4349
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,2048,1,0,12.4424
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,4096,1,0,19.5192
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,0,0.5231
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,4,1,0,0.5231
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,8,1,0,0.5391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,16,1,0,0.5613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,32,1,0,0.6381
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,64,1,0,0.7967
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,128,1,0,1.2218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,256,1,0,2.3678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,512,1,0,4.8648
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1024,1,0,10.7936
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,2048,1,0,18.3082
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,4096,1,0,40.2824
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,0,0.5144
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,4,1,0,0.5233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,8,1,0,0.5528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,16,1,0,0.6136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,32,1,0,0.7941
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,64,1,0,1.2274
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,128,1,0,2.3139
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,256,1,0,4.7275
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,512,1,0,9.9545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1024,1,0,17.6087
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,2048,1,0,35.3828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,0,0.5175
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,4,1,0,0.5589
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,8,1,0,0.6262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,16,1,0,0.8033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,32,1,0,1.2332
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,64,1,0,2.3236
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,128,1,0,4.6229
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,256,1,0,9.6138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,512,1,0,17.1114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1024,1,0,34.7150
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,0,0.5416
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,4,1,0,0.6164
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,8,1,0,0.7942
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,16,1,0,1.2322
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,32,1,0,2.3488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,64,1,0,4.6803
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,128,1,0,9.0905
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,256,1,0,17.0739
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,512,1,0,35.1305
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,0,0.5764
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,4,1,0,0.8083
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,8,1,0,1.2353
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,16,1,0,2.3362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,32,1,0,4.5478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,64,1,0,9.4235
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,128,1,0,16.9348
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,256,1,0,35.1148
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,0,0.6147
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,4,1,0,1.2356
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,8,1,0,2.3443
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,16,1,0,4.6211
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,32,1,0,9.5374
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,64,1,0,16.9993
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,128,1,0,35.3680
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,0,0.4922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,4,1,0,0.5307
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,8,1,0,0.5179
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,16,1,0,0.5189
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,32,1,0,0.5182
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,64,1,0,0.5125
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,128,1,0,0.5148
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,256,1,0,0.5231
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,512,1,0,0.5390
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1024,1,0,0.5975
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,2048,1,0,0.8044
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,4096,1,0,1.9758
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,8192,1,0,3.4204
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,16384,1,0,7.1691
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,0,0.5210
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,4,1,0,0.5155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,8,1,0,0.5075
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,16,1,0,0.5145
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,32,1,0,0.5082
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,64,1,0,0.5047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,128,1,0,0.5150
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,256,1,0,0.5306
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,512,1,0,0.5847
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1024,1,0,0.7104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,2048,1,0,1.1690
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,4096,1,0,3.0735
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,8192,1,0,6.4525
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,16384,1,0,14.4477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,0,0.5127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,4,1,0,0.5146
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,8,1,0,0.5147
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,16,1,0,0.5104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,32,1,0,0.5093
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,64,1,0,0.5132
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,128,1,0,0.5294
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,256,1,0,0.5611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,512,1,0,0.6799
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1024,1,0,0.9941
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,2048,1,0,2.0143
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,4096,1,0,5.7824
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,8192,1,0,12.9514
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,16384,1,0,16.1853
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,0,0.5112
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,4,1,0,0.5184
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,8,1,0,0.5101
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,16,1,0,0.5116
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,32,1,0,0.5122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,64,1,0,0.5416
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,128,1,0,0.5646
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,256,1,0,0.6593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,512,1,0,0.9167
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1024,1,0,1.6553
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,2048,1,0,3.9142
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,4096,1,0,11.4950
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,8192,1,0,13.6582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,16384,1,0,36.8915
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,0,0.5108
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,4,1,0,0.5096
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,8,1,0,0.5095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,16,1,0,0.5195
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,32,1,0,0.5328
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,64,1,0,0.5647
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,128,1,0,0.6414
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,256,1,0,0.8789
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,512,1,0,1.5164
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1024,1,0,3.2144
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,2048,1,0,7.8762
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,4096,1,0,12.0703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,0,0.5180
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,4,1,0,0.5101
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,8,1,0,0.5149
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,16,1,0,0.5362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,32,1,0,0.5571
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,64,1,0,0.6343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,128,1,0,0.8554
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,256,1,0,1.4026
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,512,1,0,2.8992
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1024,1,0,6.4618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,2048,1,0,8.6746
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,4096,1,0,27.8889
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,0,0.5060
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,4,1,0,0.5198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,8,1,0,0.5255
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,16,1,0,0.5786
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,32,1,0,0.6315
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,64,1,0,0.8440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,128,1,0,1.3375
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,256,1,0,2.6680
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,512,1,0,5.8807
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1024,1,0,8.5481
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,2048,1,0,17.5634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,0,0.5100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,4,1,0,0.5360
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,8,1,0,0.5630
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,16,1,0,0.6407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,32,1,0,0.8408
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,64,1,0,1.3419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,128,1,0,2.5515
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,256,1,0,5.3424
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,512,1,0,8.7672
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1024,1,0,17.7380
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,0,0.5281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,4,1,0,0.5768
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,8,1,0,0.6439
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,16,1,0,0.8558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,32,1,0,1.3451
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,64,1,0,2.5524
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,128,1,0,5.1299
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,256,1,0,8.4592
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,512,1,0,18.0183
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,0,0.5378
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,4,1,0,0.6384
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,8,1,0,0.8551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,16,1,0,1.3478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,32,1,0,2.5609
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,64,1,0,5.1589
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,128,1,0,8.9606
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,256,1,0,17.8932
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,0,0.5646
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,4,1,0,0.8484
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,8,1,0,1.3519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,16,1,0,2.5760
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,32,1,0,5.1767
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,64,1,0,8.9667
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,128,1,0,17.7410
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,0,0.4876
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,4,1,0,0.5130
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,8,1,0,0.5116
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,16,1,0,0.5176
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,32,1,0,0.5122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,64,1,0,0.5097
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,128,1,0,0.5041
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,256,1,0,0.5068
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,512,1,0,0.5067
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1024,1,0,0.5433
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,2048,1,0,0.7220
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,4096,1,0,1.7927
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,8192,1,0,2.9417
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,16384,1,0,5.9841
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,0,0.5014
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,4,1,0,0.4973
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,8,1,0,0.5012
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,16,1,0,0.4995
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,32,1,0,0.5024
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,64,1,0,0.5005
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,128,1,0,0.4997
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,256,1,0,0.4928
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,512,1,0,0.5313
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1024,1,0,0.6233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,2048,1,0,0.9891
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,4096,1,0,2.6230
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,8192,1,0,5.2963
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,16384,1,0,13.8296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,0,0.5129
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,4,1,0,0.5032
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,8,1,0,0.5017
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,16,1,0,0.4995
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,32,1,0,0.5080
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,64,1,0,0.5089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,128,1,0,0.5047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,256,1,0,0.5252
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,512,1,0,0.5902
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1024,1,0,0.8156
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,2048,1,0,1.5206
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,4096,1,0,4.7061
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,8192,1,0,13.2399
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,16384,1,0,12.4507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,0,0.4976
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,4,1,0,0.5118
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,8,1,0,0.5108
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,16,1,0,0.5049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,32,1,0,0.5073
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,64,1,0,0.5097
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,128,1,0,0.5266
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,256,1,0,0.5751
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,512,1,0,0.7387
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1024,1,0,1.1743
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,2048,1,0,2.8035
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,4096,1,0,9.1169
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,8192,1,0,10.1870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,16384,1,0,30.0332
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,0,0.5049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,4,1,0,0.4998
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,8,1,0,0.5072
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,16,1,0,0.5069
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,32,1,0,0.5008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,64,1,0,0.5239
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,128,1,0,0.5500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,256,1,0,0.6953
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,512,1,0,1.0365
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1024,1,0,2.1399
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,2048,1,0,5.5030
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,4096,1,0,8.7487
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,0,0.5077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,4,1,0,0.5063
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,8,1,0,0.5025
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,16,1,0,0.5087
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,32,1,0,0.5287
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,64,1,0,0.5524
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,128,1,0,0.6587
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,256,1,0,0.9427
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,512,1,0,1.8413
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1024,1,0,4.1749
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,2048,1,0,4.1232
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,4096,1,0,20.0321
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,0,0.5023
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,4,1,0,0.5040
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,8,1,0,0.5042
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,16,1,0,0.5266
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,32,1,0,0.5481
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,64,1,0,0.6671
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,128,1,0,0.8768
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,256,1,0,1.6604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,512,1,0,3.5937
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1024,1,0,4.1486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,2048,1,0,8.0562
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,0,0.5059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,4,1,0,0.5071
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,8,1,0,0.5284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,16,1,0,0.5509
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,32,1,0,0.6686
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,64,1,0,0.8796
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,128,1,0,1.5269
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,256,1,0,3.2091
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,512,1,0,4.1207
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1024,1,0,8.0303
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,0,0.5052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,4,1,0,0.5199
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,8,1,0,0.5508
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,16,1,0,0.6633
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,32,1,0,0.8817
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,64,1,0,1.5270
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,128,1,0,2.9407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,256,1,0,4.1178
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,512,1,0,8.0488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,0,0.5166
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,4,1,0,0.5570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,8,1,0,0.6710
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,16,1,0,0.8870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,32,1,0,1.5331
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,64,1,0,2.9504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,128,1,0,4.1275
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,256,1,0,8.0448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,0,0.5262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,4,1,0,0.6710
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,8,1,0,0.8824
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,16,1,0,1.5354
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,32,1,0,2.9583
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,64,1,0,4.1207
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,128,1,0,8.0246
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,0,0.4815
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,4,1,0,0.5131
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,8,1,0,0.5127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,16,1,0,0.5035
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,32,1,0,0.5002
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,64,1,0,0.5062
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,128,1,0,0.5002
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,256,1,0,0.5031
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,512,1,0,0.5035
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1024,1,0,0.5149
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,2048,1,0,0.6797
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,4096,1,0,1.6913
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,8192,1,0,2.7313
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,16384,1,0,5.4826
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,0,0.5114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,4,1,0,0.5102
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,8,1,0,0.5056
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,16,1,0,0.5097
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,32,1,0,0.5141
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,64,1,0,0.5071
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,128,1,0,0.5108
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,256,1,0,0.5058
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,512,1,0,0.5132
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1024,1,0,0.5912
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,2048,1,0,0.9091
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,4096,1,0,2.4510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,8192,1,0,4.8016
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,16384,1,0,10.8033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,0,0.5083
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,4,1,0,0.5134
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,8,1,0,0.5106
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,16,1,0,0.5089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,32,1,0,0.5057
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,64,1,0,0.5122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,128,1,0,0.5091
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,256,1,0,0.5164
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,512,1,0,0.5570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1024,1,0,0.7421
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,2048,1,0,1.3692
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,4096,1,0,4.1422
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,8192,1,0,9.4397
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,16384,1,0,10.7616
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,0,0.5042
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,4,1,0,0.5157
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,8,1,0,0.5076
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,16,1,0,0.5160
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,32,1,0,0.5128
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,64,1,0,0.5166
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,128,1,0,0.5228
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,256,1,0,0.5337
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,512,1,0,0.6548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1024,1,0,1.0299
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,2048,1,0,2.3429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,4096,1,0,8.1199
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,8192,1,0,8.6285
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,16384,1,0,26.9674
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,0,0.4982
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,4,1,0,0.5094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,8,1,0,0.5107
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,16,1,0,0.5076
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,32,1,0,0.5123
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,64,1,0,0.5130
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,128,1,0,0.5080
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,256,1,0,0.6198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,512,1,0,0.8747
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1024,1,0,1.6958
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,2048,1,0,4.5843
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,4096,1,0,7.2126
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,0,0.5194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,4,1,0,0.5184
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,8,1,0,0.5122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,16,1,0,0.5138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,32,1,0,0.5090
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,64,1,0,0.5198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,128,1,0,0.5893
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,256,1,0,0.7946
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,512,1,0,1.3849
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1024,1,0,3.2814
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,2048,1,0,2.8326
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,4096,1,0,16.8866
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,0,0.5056
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,4,1,0,0.5079
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,8,1,0,0.5145
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,16,1,0,0.5120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,32,1,0,0.5171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,64,1,0,0.5836
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,128,1,0,0.7176
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,256,1,0,1.1917
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,512,1,0,2.6648
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1024,1,0,2.8227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,2048,1,0,5.5335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,0,0.5053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,4,1,0,0.5127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,8,1,0,0.5151
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,16,1,0,0.5153
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,32,1,0,0.5836
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,64,1,0,0.7190
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,128,1,0,1.0419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,256,1,0,2.2607
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,512,1,0,2.8202
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1024,1,0,5.5355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,0,0.5081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,4,1,0,0.5055
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,8,1,0,0.5088
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,16,1,0,0.5802
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,32,1,0,0.7129
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,64,1,0,1.0420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,128,1,0,1.9629
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,256,1,0,2.8197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,512,1,0,5.5393
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,0,0.5097
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,4,1,0,0.5057
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,8,1,0,0.5764
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,16,1,0,0.7114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,32,1,0,1.0435
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,64,1,0,1.9647
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,128,1,0,2.5934
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,256,1,0,4.8725
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,0,0.5021
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,4,1,0,0.5816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,8,1,0,0.7110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,16,1,0,1.0446
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,32,1,0,1.9676
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,64,1,0,2.5911
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,128,1,0,4.8777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,0,0.8408
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,4,1,0,0.8583
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,8,1,0,0.8339
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,16,1,0,0.8214
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,32,1,0,0.8194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,64,1,0,0.8281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,128,1,0,0.8155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,256,1,0,0.8232
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,512,1,0,0.9707
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1024,1,0,1.2866
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,2048,1,0,1.9072
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,4096,1,0,3.8498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,8192,1,0,6.8212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,16384,1,0,13.6454
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,0,0.8571
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,4,1,0,0.8351
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,8,1,0,0.8317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,16,1,0,0.8291
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,32,1,0,0.8343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,64,1,0,0.8324
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,128,1,0,0.8267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,256,1,0,0.9840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,512,1,0,1.3139
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1024,1,0,1.9463
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,2048,1,0,3.1546
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,4096,1,0,6.6112
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,8192,1,0,13.0875
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,16384,1,0,27.9991
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,0,0.8469
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,4,1,0,0.8222
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,8,1,0,0.8249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,16,1,0,0.8247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,32,1,0,0.8221
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,64,1,0,0.8276
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,128,1,0,0.9870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,256,1,0,1.3545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,512,1,0,1.9989
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1024,1,0,3.2490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,2048,1,0,5.7976
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,4096,1,0,13.7458
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,8192,1,0,26.2334
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,16384,1,0,55.9211
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,0,0.8339
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,4,1,0,0.8510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,8,1,0,0.8179
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,16,1,0,0.8255
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,32,1,0,0.8162
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,64,1,0,0.9716
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,128,1,0,1.3281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,256,1,0,2.0149
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,512,1,0,3.3334
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1024,1,0,5.9873
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,2048,1,0,11.3440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,4096,1,0,25.9418
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,8192,1,0,53.5427
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,0,0.7978
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,4,1,0,0.8092
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,8,1,0,0.8047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,16,1,0,0.7959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,32,1,0,0.8112
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,64,1,0,0.8127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,128,1,0,0.8030
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,256,1,0,0.8064
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,512,1,0,0.8869
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1024,1,0,1.1362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,2048,1,0,1.6544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,4096,1,0,3.3791
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,8192,1,0,5.9491
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,16384,1,0,11.9237
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,0,0.8112
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,4,1,0,0.8157
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,8,1,0,0.7974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,16,1,0,0.8083
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,32,1,0,0.8091
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,64,1,0,0.8090
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,128,1,0,0.8067
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,256,1,0,0.8786
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,512,1,0,1.1310
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1024,1,0,1.6533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,2048,1,0,2.6513
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,4096,1,0,5.6627
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,8192,1,0,11.3000
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,16384,1,0,24.6128
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,0,0.8079
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,4,1,0,0.8122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,8,1,0,0.8077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,16,1,0,0.8105
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,32,1,0,0.8071
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,64,1,0,0.8060
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,128,1,0,0.8789
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,256,1,0,1.1395
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,512,1,0,1.6391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1024,1,0,2.6241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,2048,1,0,4.6474
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,4096,1,0,10.6578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,8192,1,0,23.0228
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,16384,1,0,57.4682
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,0,0.7924
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,4,1,0,0.8272
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,8,1,0,0.8070
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,16,1,0,0.8007
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,32,1,0,0.8015
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,64,1,0,0.8742
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,128,1,0,1.1208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,256,1,0,1.6278
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,512,1,0,2.6070
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1024,1,0,4.5966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,2048,1,0,9.1492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,4096,1,0,21.2046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,8192,1,0,46.8362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,16384,1,0,102.6512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,0,0.8067
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,4,1,0,0.8059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,8,1,0,0.8057
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,16,1,0,0.8059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,32,1,0,0.8727
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,64,1,0,1.1182
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,128,1,0,1.6194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,256,1,0,2.5930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,512,1,0,4.5663
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1024,1,0,8.8744
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,2048,1,0,19.1896
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,4096,1,0,43.7871
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,0,0.8042
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,4,1,0,0.8060
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,8,1,0,0.8093
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,16,1,0,0.8814
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,32,1,0,1.1328
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,64,1,0,1.6176
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,128,1,0,2.5847
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,256,1,0,4.5685
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,512,1,0,8.6661
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1024,1,0,17.8102
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,2048,1,0,38.0953
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,4096,1,0,90.9296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,0,0.8045
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,4,1,0,0.8060
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,8,1,0,0.8834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,16,1,0,1.1303
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,32,1,0,1.6293
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,64,1,0,2.5766
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,128,1,0,4.5465
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,256,1,0,8.6337
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,512,1,0,17.2453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1024,1,0,36.5592
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,2048,1,0,76.0978
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,0,0.7985
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,4,1,0,0.8782
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,8,1,0,1.1338
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,16,1,0,1.6092
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,32,1,0,2.5748
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,64,1,0,4.5444
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,128,1,0,8.6053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,256,1,0,16.9664
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,512,1,0,35.1578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1024,1,0,73.1651
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,0,0.8010
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,4,1,0,1.1284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,8,1,0,1.6244
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,16,1,0,2.5730
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,32,1,0,4.5346
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,64,1,0,8.6041
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,128,1,0,16.9230
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,256,1,0,33.9820
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,512,1,0,70.9733
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,0,0.8683
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,4,1,0,1.6150
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,8,1,0,2.5676
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,16,1,0,4.5377
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,32,1,0,8.5983
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,64,1,0,16.9246
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,128,1,0,33.6461
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,256,1,0,68.9206
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,0,1.1260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,4,1,0,2.5791
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,8,1,0,4.5431
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,16,1,0,8.5974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,32,1,0,16.9171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,64,1,0,33.7247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,128,1,0,68.2894
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,0,0.8418
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,4,1,0,0.9843
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,8,1,0,0.8624
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,16,1,0,0.8499
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,32,1,0,0.8541
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,64,1,0,0.8493
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,128,1,0,0.8447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,256,1,0,0.8477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,512,1,0,0.8775
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1024,1,0,1.1453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,2048,1,0,1.5921
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,4096,1,0,3.2500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,8192,1,0,5.5486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,16384,1,0,11.0316
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,0,0.8536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,4,1,0,0.8488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,8,1,0,0.8500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,16,1,0,0.8569
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,32,1,0,0.8473
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,64,1,0,0.8536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,128,1,0,0.8449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,256,1,0,0.8844
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,512,1,0,1.1226
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1024,1,0,1.5801
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,2048,1,0,2.4927
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,4096,1,0,5.2559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,8192,1,0,10.0521
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,16384,1,0,21.8944
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,0,0.8498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,4,1,0,0.8444
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,8,1,0,0.8464
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,16,1,0,0.8546
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,32,1,0,0.8477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,64,1,0,0.8546
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,128,1,0,0.8816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,256,1,0,1.1208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,512,1,0,1.5705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1024,1,0,2.4698
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,2048,1,0,4.2858
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,4096,1,0,9.8336
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,8192,1,0,20.1378
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,16384,1,0,44.9918
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,0,0.8549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,4,1,0,0.8800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,8,1,0,0.8551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,16,1,0,0.8478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,32,1,0,0.8554
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,64,1,0,0.8991
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,128,1,0,1.1318
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,256,1,0,1.5660
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,512,1,0,2.4558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1024,1,0,4.2201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,2048,1,0,8.1990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,4096,1,0,33.7069
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,8192,1,0,42.3721
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,16384,1,0,93.8081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,0,0.8370
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,4,1,0,0.8482
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,8,1,0,0.8469
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,16,1,0,0.8428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,32,1,0,0.8853
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,64,1,0,1.1145
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,128,1,0,1.5624
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,256,1,0,2.4396
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,512,1,0,4.1962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1024,1,0,7.7952
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,2048,1,0,16.6443
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,4096,1,0,39.4112
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,0,0.8460
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,4,1,0,0.8419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,8,1,0,0.8412
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,16,1,0,0.8788
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,32,1,0,1.1136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,64,1,0,1.5472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,128,1,0,2.4296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,256,1,0,4.1826
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,512,1,0,7.7690
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1024,1,0,15.7401
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,2048,1,0,34.0777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,4096,1,0,82.1260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,0,0.8382
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,4,1,0,0.8422
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,8,1,0,0.8823
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,16,1,0,1.1097
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,32,1,0,1.5550
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,64,1,0,2.4245
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,128,1,0,4.1717
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,256,1,0,7.7504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,512,1,0,15.2035
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1024,1,0,31.3508
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,2048,1,0,67.8446
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,0,0.8409
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,4,1,0,0.8887
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,8,1,0,1.1234
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,16,1,0,1.5527
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,32,1,0,2.4273
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,64,1,0,4.1699
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,128,1,0,7.7273
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,256,1,0,15.1079
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,512,1,0,30.2254
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1024,1,0,64.0597
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,0,0.8434
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,4,1,0,1.1174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,8,1,0,1.5551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,16,1,0,2.4385
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,32,1,0,4.1688
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,64,1,0,7.7268
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,128,1,0,15.0684
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,256,1,0,29.9731
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,512,1,0,61.1915
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,0,0.8860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,4,1,0,1.5635
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,8,1,0,2.4267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,16,1,0,4.1659
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,32,1,0,7.7204
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,64,1,0,15.0650
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,128,1,0,29.8828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,256,1,0,59.7217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,0,1.1175
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,4,1,0,2.4343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,8,1,0,4.1819
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,16,1,0,7.7217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,32,1,0,15.0506
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,64,1,0,29.8764
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,128,1,0,59.5438
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,0,0.8288
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,4,1,0,0.8421
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,8,1,0,0.8452
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,16,1,0,0.8378
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,32,1,0,0.8306
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,64,1,0,0.8262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,128,1,0,0.8213
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,256,1,0,0.8250
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,512,1,0,0.8538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1024,1,0,1.0865
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,2048,1,0,1.5377
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,4096,1,0,3.1180
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,8192,1,0,5.2818
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,16384,1,0,10.4121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,0,0.8316
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,4,1,0,0.8281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,8,1,0,0.8316
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,16,1,0,0.8251
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,32,1,0,0.8270
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,64,1,0,0.8248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,128,1,0,0.8238
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,256,1,0,0.8572
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,512,1,0,1.0787
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1024,1,0,1.5197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,2048,1,0,2.3840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,4096,1,0,5.0133
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,8192,1,0,9.9552
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,16384,1,0,19.9704
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,0,0.8264
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,4,1,0,0.8332
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,8,1,0,0.8327
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,16,1,0,0.8329
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,32,1,0,0.8299
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,64,1,0,0.8357
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,128,1,0,0.8625
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,256,1,0,1.0844
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,512,1,0,1.5169
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1024,1,0,2.3644
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,2048,1,0,4.0962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,4096,1,0,9.2941
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,8192,1,0,19.4106
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,16384,1,0,42.7399
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,0,0.8336
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,4,1,0,0.8623
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,8,1,0,0.8312
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,16,1,0,0.8384
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,32,1,0,0.8229
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,64,1,0,0.8656
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,128,1,0,1.0882
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,256,1,0,1.5127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,512,1,0,2.3490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1024,1,0,4.0255
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,2048,1,0,7.7578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,4096,1,0,18.3964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,8192,1,0,40.1386
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,16384,1,0,89.2411
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,0,0.8328
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,4,1,0,0.8330
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,8,1,0,0.8309
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,16,1,0,0.8267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,32,1,0,0.8615
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,64,1,0,1.0978
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,128,1,0,1.5069
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,256,1,0,2.3361
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,512,1,0,3.9884
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1024,1,0,7.4149
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,2048,1,0,15.3914
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,4096,1,0,37.6802
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,0,0.8231
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,4,1,0,0.8174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,8,1,0,0.8311
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,16,1,0,0.8935
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,32,1,0,1.0812
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,64,1,0,1.5100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,128,1,0,2.3222
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,256,1,0,3.9945
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,512,1,0,7.3505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1024,1,0,14.6804
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,2048,1,0,31.6590
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,4096,1,0,78.1051
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,0,0.8225
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,4,1,0,0.8222
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,8,1,0,0.8600
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,16,1,0,1.0709
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,32,1,0,1.4993
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,64,1,0,2.3395
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,128,1,0,3.9745
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,256,1,0,7.3335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,512,1,0,14.2264
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1024,1,0,30.0867
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,2048,1,0,63.6582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,0,0.8270
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,4,1,0,0.8726
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,8,1,0,1.0840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,16,1,0,1.4989
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,32,1,0,2.3183
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,64,1,0,3.9781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,128,1,0,7.3106
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,256,1,0,14.2124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,512,1,0,28.2784
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1024,1,0,60.3299
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,0,0.8298
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,4,1,0,1.0862
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,8,1,0,1.5039
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,16,1,0,2.3212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,32,1,0,3.9790
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,64,1,0,7.3110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,128,1,0,14.1667
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,256,1,0,28.1739
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,512,1,0,57.6754
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,0,0.8623
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,4,1,0,1.5055
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,8,1,0,2.3218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,16,1,0,3.9734
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,32,1,0,7.3247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,64,1,0,14.1627
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,128,1,0,28.0828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,256,1,0,56.1912
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,0,1.0875
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,4,1,0,2.3240
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,8,1,0,3.9919
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,16,1,0,7.3098
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,32,1,0,14.1544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,64,1,0,28.0719
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,128,1,0,55.9260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,0,0.7946
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,4,1,0,0.8357
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,8,1,0,0.8173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,16,1,0,0.8201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,32,1,0,0.8202
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,64,1,0,0.8135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,128,1,0,0.8070
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,256,1,0,0.8190
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,512,1,0,0.8152
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1024,1,0,0.8477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,2048,1,0,1.0902
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,4096,1,0,2.4081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,8192,1,0,4.1135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,16384,1,0,8.0747
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,0,0.8202
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,4,1,0,0.7953
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,8,1,0,0.7991
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,16,1,0,0.7995
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,32,1,0,0.7951
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,64,1,0,0.7996
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,128,1,0,0.7956
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,256,1,0,0.8033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,512,1,0,0.8262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1024,1,0,0.9694
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,2048,1,0,1.5222
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,4096,1,0,3.7561
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,8192,1,0,7.4016
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,16384,1,0,15.9982
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,0,0.8057
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,4,1,0,0.7856
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,8,1,0,0.7836
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,16,1,0,0.7916
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,32,1,0,0.7919
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,64,1,0,0.7927
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,128,1,0,0.7992
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,256,1,0,0.8317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,512,1,0,0.9264
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1024,1,0,1.3187
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,2048,1,0,2.5490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,4096,1,0,6.6987
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,8192,1,0,14.5936
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,16384,1,0,18.6060
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,0,0.7907
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,4,1,0,0.8043
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,8,1,0,0.7977
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,16,1,0,0.7857
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,32,1,0,0.7966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,64,1,0,0.8056
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,128,1,0,0.8374
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,256,1,0,0.9094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,512,1,0,1.2123
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1024,1,0,2.1465
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,2048,1,0,4.8174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,4096,1,0,13.6086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,8192,1,0,18.0412
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,0,0.7968
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,4,1,0,0.8122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,8,1,0,0.8007
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,16,1,0,0.8073
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,32,1,0,0.8143
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,64,1,0,0.8455
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,128,1,0,0.9073
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,256,1,0,1.1709
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,512,1,0,1.9450
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1024,1,0,3.9990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,2048,1,0,9.5512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,4096,1,0,15.2520
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,0,0.8056
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,4,1,0,0.8010
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,8,1,0,0.8058
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,16,1,0,0.8150
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,32,1,0,0.8392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,64,1,0,0.9075
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,128,1,0,1.1549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,256,1,0,1.8505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,512,1,0,3.6001
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1024,1,0,7.9424
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,2048,1,0,10.6831
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,0,0.7976
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,4,1,0,0.8045
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,8,1,0,0.8100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,16,1,0,0.8413
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,32,1,0,0.8964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,64,1,0,1.1457
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,128,1,0,1.7981
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,256,1,0,3.3837
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,512,1,0,7.0844
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1024,1,0,10.7567
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,0,0.7988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,4,1,0,0.8080
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,8,1,0,0.8315
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,16,1,0,0.9022
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,32,1,0,1.1452
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,64,1,0,1.7912
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,128,1,0,3.2853
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,256,1,0,6.7078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,512,1,0,10.8362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,0,0.8010
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,4,1,0,0.8355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,8,1,0,0.8997
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,16,1,0,1.1458
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,32,1,0,1.7894
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,64,1,0,3.2856
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,128,1,0,6.5054
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,256,1,0,10.6754
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,0,0.8167
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,4,1,0,0.9079
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,8,1,0,1.1538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,16,1,0,1.7983
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,32,1,0,3.2883
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,64,1,0,6.3906
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,128,1,0,10.7332
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,0,0.8428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,4,1,0,1.1533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,8,1,0,1.8043
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,16,1,0,3.2885
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,32,1,0,6.4214
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,64,1,0,10.8088
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,0,0.7610
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,4,1,0,0.7953
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,8,1,0,0.8078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,16,1,0,0.7976
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,32,1,0,0.7950
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,64,1,0,0.7988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,128,1,0,0.7965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,256,1,0,0.7980
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,512,1,0,0.7966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1024,1,0,0.7984
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,2048,1,0,0.9547
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,4096,1,0,2.0652
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,8192,1,0,3.4697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,16384,1,0,6.7102
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,0,0.7831
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,4,1,0,0.7776
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,8,1,0,0.7779
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,16,1,0,0.7792
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,32,1,0,0.7805
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,64,1,0,0.7763
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,128,1,0,0.7820
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,256,1,0,0.7762
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,512,1,0,0.7871
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1024,1,0,0.8537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,2048,1,0,1.2411
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,4096,1,0,3.1032
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,8192,1,0,6.0200
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,16384,1,0,13.2467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,0,0.7843
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,4,1,0,0.7810
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,8,1,0,0.7927
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,16,1,0,0.7867
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,32,1,0,0.7890
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,64,1,0,0.7768
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,128,1,0,0.7885
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,256,1,0,0.7914
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,512,1,0,0.8332
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1024,1,0,1.0711
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,2048,1,0,1.9201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,4096,1,0,5.3122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,8192,1,0,11.8338
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,16384,1,0,13.7324
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,0,0.7899
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,4,1,0,0.7939
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,8,1,0,0.7911
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,16,1,0,0.7829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,32,1,0,0.7894
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,64,1,0,0.7896
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,128,1,0,0.7952
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,256,1,0,0.8281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,512,1,0,0.9882
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1024,1,0,1.5736
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,2048,1,0,3.4266
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,4096,1,0,10.3866
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,8192,1,0,11.6965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,16384,1,0,31.9041
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,0,0.7985
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,4,1,0,0.7971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,8,1,0,0.7896
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,16,1,0,0.7928
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,32,1,0,0.7913
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,64,1,0,0.7950
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,128,1,0,0.8232
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,256,1,0,0.9484
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,512,1,0,1.4159
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1024,1,0,2.6964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,2048,1,0,6.6777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,4096,1,0,10.1138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,0,0.7861
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,4,1,0,0.7875
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,8,1,0,0.7774
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,16,1,0,0.7888
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,32,1,0,0.8010
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,64,1,0,0.8318
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,128,1,0,0.9094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,256,1,0,1.3055
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,512,1,0,2.4156
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1024,1,0,5.2097
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,2048,1,0,5.5699
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,4096,1,0,22.9343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,0,0.7793
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,4,1,0,0.7805
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,8,1,0,0.7964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,16,1,0,0.7982
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,32,1,0,0.8285
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,64,1,0,0.9033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,128,1,0,1.2387
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,256,1,0,2.1672
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,512,1,0,4.6103
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1024,1,0,5.5146
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,2048,1,0,11.3091
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,0,0.7827
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,4,1,0,0.7871
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,8,1,0,0.7991
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,16,1,0,0.8305
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,32,1,0,0.9055
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,64,1,0,1.2365
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,128,1,0,2.0261
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,256,1,0,4.0860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,512,1,0,5.5858
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1024,1,0,11.3393
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,0,0.7917
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,4,1,0,0.7961
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,8,1,0,0.8254
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,16,1,0,0.9098
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,32,1,0,1.2341
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,64,1,0,2.0292
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,128,1,0,3.8211
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,256,1,0,5.4666
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,512,1,0,11.4184
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,0,0.7903
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,4,1,0,0.8298
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,8,1,0,0.9105
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,16,1,0,1.2396
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,32,1,0,2.0280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,64,1,0,3.8223
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,128,1,0,5.5774
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,256,1,0,11.4299
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,0,0.8034
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,4,1,0,0.9135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,8,1,0,1.2354
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,16,1,0,2.0279
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,32,1,0,3.8218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,64,1,0,5.5075
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,128,1,0,11.3493
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,0,0.7822
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,4,1,0,0.8198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,8,1,0,0.8100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,16,1,0,0.8009
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,32,1,0,0.8114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,64,1,0,0.8133
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,128,1,0,0.8077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,256,1,0,0.8095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,512,1,0,0.8104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1024,1,0,0.8064
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,2048,1,0,0.9024
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,4096,1,0,1.9715
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,8192,1,0,3.1736
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,16384,1,0,8.4100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,0,0.8613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,4,1,0,0.8341
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,8,1,0,0.8018
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,16,1,0,0.8033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,32,1,0,0.8107
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,64,1,0,0.8021
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,128,1,0,0.7965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,256,1,0,0.7984
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,512,1,0,0.8039
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1024,1,0,0.8158
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,2048,1,0,1.1380
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,4096,1,0,6.3648
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,8192,1,0,5.9207
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,16384,1,0,11.4560
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,0,0.7984
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,4,1,0,0.7876
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,8,1,0,0.7956
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,16,1,0,0.7951
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,32,1,0,0.7880
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,64,1,0,0.7934
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,128,1,0,0.7822
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,256,1,0,0.7973
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,512,1,0,0.7988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1024,1,0,0.9590
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,2048,1,0,1.6180
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,4096,1,0,4.6740
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,8192,1,0,10.1163
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,16384,1,0,11.6500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,0,0.7955
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,4,1,0,0.7933
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,8,1,0,0.7896
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,16,1,0,0.7866
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,32,1,0,0.7818
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,64,1,0,0.7927
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,128,1,0,0.7965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,256,1,0,0.7998
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,512,1,0,0.8710
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1024,1,0,1.2928
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,2048,1,0,2.6840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,4096,1,0,8.7070
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,8192,1,0,9.6094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,16384,1,0,27.7649
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,0,0.8018
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,4,1,0,0.7936
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,8,1,0,0.7914
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,16,1,0,0.7851
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,32,1,0,0.7870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,64,1,0,0.7976
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,128,1,0,0.7960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,256,1,0,0.8464
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,512,1,0,1.1268
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1024,1,0,2.0194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,2048,1,0,5.0792
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,4096,1,0,8.0298
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,0,0.7998
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,4,1,0,0.7866
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,8,1,0,0.7958
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,16,1,0,0.7933
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,32,1,0,0.7953
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,64,1,0,0.7998
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,128,1,0,0.8302
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,256,1,0,1.0503
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,512,1,0,1.7133
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1024,1,0,3.7354
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,2048,1,0,3.3343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,4096,1,0,18.4727
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,0,0.7918
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,4,1,0,0.7985
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,8,1,0,0.7880
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,16,1,0,0.8035
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,32,1,0,0.8002
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,64,1,0,0.8318
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,128,1,0,0.9749
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,256,1,0,1.5368
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,512,1,0,3.1487
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1024,1,0,3.3259
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,2048,1,0,6.5524
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,0,0.7850
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,4,1,0,0.7979
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,8,1,0,0.7954
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,16,1,0,0.7998
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,32,1,0,0.8241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,64,1,0,0.9762
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,128,1,0,1.3917
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,256,1,0,2.7575
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,512,1,0,3.3308
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1024,1,0,6.6886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,0,0.8011
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,4,1,0,0.8025
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,8,1,0,0.8021
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,16,1,0,0.8272
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,32,1,0,0.9828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,64,1,0,1.3919
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,128,1,0,2.4734
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,256,1,0,3.3312
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,512,1,0,6.7172
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,0,0.8022
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,4,1,0,0.7937
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,8,1,0,0.8190
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,16,1,0,0.9838
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,32,1,0,1.3913
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,64,1,0,2.4752
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,128,1,0,3.3218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,256,1,0,6.7692
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,0,0.8091
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,4,1,0,0.8351
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,8,1,0,0.9905
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,16,1,0,1.4018
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,32,1,0,2.4728
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,64,1,0,3.3338
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,128,1,0,6.6330
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,0,0.7650
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,4,1,0,0.7962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,8,1,0,0.7998
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,16,1,0,0.7966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,32,1,0,0.7899
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,64,1,0,0.7896
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,128,1,0,0.7890
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,256,1,0,0.7934
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,512,1,0,0.7924
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1024,1,0,0.7925
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,2048,1,0,0.8668
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,4096,1,0,1.9093
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,8192,1,0,2.9867
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,16384,1,0,5.6691
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,0,0.7855
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,4,1,0,0.7800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,8,1,0,0.7778
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,16,1,0,0.7779
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,32,1,0,0.7813
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,64,1,0,0.7779
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,128,1,0,0.7763
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,256,1,0,0.7991
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,512,1,0,0.7945
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1024,1,0,0.7966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,2048,1,0,1.0758
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,4096,1,0,2.6214
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,8192,1,0,5.2158
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,16384,1,0,10.7661
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,0,0.7914
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,4,1,0,0.7846
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,8,1,0,0.7804
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,16,1,0,0.7827
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,32,1,0,0.7916
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,64,1,0,0.7965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,128,1,0,0.7896
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,256,1,0,0.7879
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,512,1,0,0.7859
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1024,1,0,0.9103
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,2048,1,0,1.5606
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,4096,1,0,4.2168
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,8192,1,0,9.4411
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,16384,1,0,10.6116
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,0,0.7888
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,4,1,0,0.8014
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,8,1,0,0.7914
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,16,1,0,0.7990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,32,1,0,0.7942
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,64,1,0,0.7987
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,128,1,0,0.7915
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,256,1,0,0.7922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,512,1,0,0.8340
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1024,1,0,1.1793
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,2048,1,0,2.4008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,4096,1,0,8.0340
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,8192,1,0,8.5732
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,16384,1,0,25.2308
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,0,0.7908
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,4,1,0,0.7935
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,8,1,0,0.7989
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,16,1,0,0.7888
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,32,1,0,0.7913
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,64,1,0,0.7883
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,128,1,0,0.7946
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,256,1,0,0.8098
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,512,1,0,1.0282
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1024,1,0,1.7545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,2048,1,0,4.4962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,4096,1,0,6.9897
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,0,0.7884
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,4,1,0,0.7849
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,8,1,0,0.7849
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,16,1,0,0.7862
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,32,1,0,0.7908
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,64,1,0,0.7922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,128,1,0,0.7870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,256,1,0,0.9411
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,512,1,0,1.4381
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1024,1,0,3.1538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,2048,1,0,2.2928
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,4096,1,0,16.4401
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,0,0.8023
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,4,1,0,0.8048
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,8,1,0,0.7915
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,16,1,0,0.7960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,32,1,0,0.7941
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,64,1,0,0.7983
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,128,1,0,0.8639
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,256,1,0,1.2519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,512,1,0,2.5209
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1024,1,0,2.2858
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,2048,1,0,4.4300
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,0,0.7912
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,4,1,0,0.7888
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,8,1,0,0.7963
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,16,1,0,0.8025
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,32,1,0,0.7906
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,64,1,0,0.8659
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,128,1,0,1.1038
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,256,1,0,2.1268
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,512,1,0,2.2824
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1024,1,0,4.4149
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,0,0.7850
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,4,1,0,0.7865
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,8,1,0,0.7812
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,16,1,0,0.7995
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,32,1,0,0.8603
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,64,1,0,1.0947
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,128,1,0,1.8232
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,256,1,0,2.2881
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,512,1,0,4.4147
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,0,0.7948
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,4,1,0,0.7946
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,8,1,0,0.8011
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,16,1,0,0.8594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,32,1,0,1.0982
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,64,1,0,1.8244
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,128,1,0,2.2847
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,256,1,0,4.4033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,0,0.7984
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,4,1,0,0.7979
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,8,1,0,0.8772
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,16,1,0,1.0955
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,32,1,0,1.8252
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,64,1,0,2.2875
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA B200,dsa_context_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,128,1,0,4.4177
