framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,1,0.1449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,4,0.1448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,8,0.1451
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,16,0.1448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,32,0.1453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,64,0.1476
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,128,0.1478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,256,0.1483
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,512,0.1490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,1024,0.1510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,2048,0.1618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,4096,0.1648
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,8192,0.1664
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,16384,0.1683
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,1,0.1465
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,4,0.1464
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,8,0.1458
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,16,0.1462
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,32,0.1461
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,64,0.1491
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,128,0.1494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,256,0.1500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,512,0.1506
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,1024,0.1534
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,2048,0.1607
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,4096,0.1645
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,8192,0.1650
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,16384,0.1686
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,1,0.1486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,4,0.1480
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,8,0.1481
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,16,0.1483
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,32,0.1481
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,64,0.1516
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,128,0.1519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,256,0.1527
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,512,0.1560
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,1024,0.1580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,2048,0.1672
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,4096,0.1708
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,8192,0.1721
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,16384,0.1763
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,1,0.1426
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,4,0.1426
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,8,0.1430
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,16,0.1425
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,32,0.1429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,64,0.1475
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,128,0.1479
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,256,0.1511
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,512,0.1517
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,1024,0.1548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,2048,0.1670
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,4096,0.1714
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,8192,0.1728
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,16384,0.1782
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,1,0.1449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,4,0.1445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,8,0.1450
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,16,0.1448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,32,0.1454
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,64,0.1518
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,128,0.1538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,256,0.1557
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,512,0.1567
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,1024,0.1631
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,2048,0.1788
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,4096,0.1790
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,8192,0.1861
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,16384,0.1927
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,1,0.1434
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,4,0.1453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,8,0.1436
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,16,0.1452
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,32,0.1436
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,64,0.1452
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,128,0.1577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,256,0.1620
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,512,0.1626
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,1024,0.1730
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,2048,0.1957
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,4096,0.1994
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,1,0.1492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,4,0.1489
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,8,0.1491
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,16,0.1494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,32,0.1495
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,64,0.1515
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,128,0.1529
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,256,0.1568
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,512,0.1636
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,1024,0.1793
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,2048,0.2138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,4096,0.2217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,1,0.1769
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,4,0.1771
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,8,0.1770
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,16,0.1778
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,32,0.1783
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,64,0.1829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,128,0.1868
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,256,0.1957
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,512,0.2121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,1024,0.2412
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,2048,0.3046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,1,0.2473
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,4,0.2478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,8,0.2486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,16,0.2482
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,32,0.2495
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,64,0.2557
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,128,0.2640
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,256,0.2797
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,512,0.3095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,1024,0.3678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,1,0.4080
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,4,0.4087
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,8,0.4099
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,16,0.4116
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,32,0.4120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,64,0.4217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,128,0.4368
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,256,0.4682
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,512,0.5278
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,1,0.7355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,4,0.7445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,8,0.7436
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,16,0.7508
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,32,0.7547
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,64,0.7615
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,128,0.7943
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,256,0.8523
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,1,0.1159
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,4,0.1159
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,8,0.1155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,16,0.1156
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,32,0.1159
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,64,0.1182
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,128,0.1190
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,256,0.1190
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,512,0.1197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,1024,0.1205
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,2048,0.1249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,4096,0.1325
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,8192,0.1335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,16384,0.1360
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,1,0.1147
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,4,0.1148
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,8,0.1149
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,16,0.1148
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,32,0.1146
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,64,0.1170
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,128,0.1177
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,256,0.1179
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,512,0.1185
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,1024,0.1200
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,2048,0.1287
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,4096,0.1324
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,8192,0.1331
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,16384,0.1362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,1,0.1169
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,4,0.1161
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,8,0.1169
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,16,0.1169
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,32,0.1166
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,64,0.1192
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,128,0.1205
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,256,0.1201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,512,0.1208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,1024,0.1239
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,2048,0.1327
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,4096,0.1366
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,8192,0.1386
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,16384,0.1411
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,1,0.1132
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,4,0.1135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,8,0.1133
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,16,0.1133
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,32,0.1134
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,64,0.1162
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,128,0.1173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,256,0.1177
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,512,0.1225
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,1024,0.1239
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,2048,0.1353
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,4096,0.1394
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,8192,0.1410
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,16384,0.1447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,1,0.1124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,4,0.1127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,8,0.1124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,16,0.1130
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,32,0.1130
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,64,0.1173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,128,0.1188
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,256,0.1208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,512,0.1218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,1024,0.1261
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,2048,0.1391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,4096,0.1431
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,8192,0.1466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,16384,0.1536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,1,0.1128
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,4,0.1129
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,8,0.1129
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,16,0.1130
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,32,0.1132
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,64,0.1206
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,128,0.1243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,256,0.1252
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,512,0.1299
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,1024,0.1355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,2048,0.1536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,4096,0.1591
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,1,0.1174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,4,0.1173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,8,0.1174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,16,0.1171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,32,0.1176
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,64,0.1203
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,128,0.1321
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,256,0.1355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,512,0.1411
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,1024,0.1532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,2048,0.1801
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,4096,0.1885
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,1,0.1330
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,4,0.1333
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,8,0.1336
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,16,0.1338
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,32,0.1339
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,64,0.1370
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,128,0.1396
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,256,0.1459
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,512,0.1576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,1024,0.1787
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,2048,0.2229
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,1,0.1694
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,4,0.1700
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,8,0.1700
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,16,0.1711
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,32,0.1715
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,64,0.1770
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,128,0.1829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,256,0.1959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,512,0.2181
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,1024,0.2588
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,1,0.2685
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,4,0.2702
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,8,0.2697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,16,0.2707
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,32,0.2727
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,64,0.2820
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,128,0.2922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,256,0.3155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,512,0.3581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,1,0.4534
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,4,0.4559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,8,0.4554
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,16,0.4593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,32,0.4602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,64,0.4724
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,128,0.4924
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,256,0.5351
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,1,0.1009
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,4,0.1004
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,8,0.1006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,16,0.1013
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,32,0.1008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,64,0.1027
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,128,0.1028
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,256,0.1032
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,512,0.1044
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,1024,0.1046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,2048,0.1102
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,4096,0.1167
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,8192,0.1183
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,16384,0.1214
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,1,0.1005
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,4,0.1003
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,8,0.1005
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,16,0.1006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,32,0.1006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,64,0.1027
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,128,0.1026
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,256,0.1024
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,512,0.1034
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,1024,0.1043
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,2048,0.1103
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,4096,0.1169
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,8192,0.1180
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,16384,0.1208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,1,0.1011
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,4,0.1012
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,8,0.1005
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,16,0.1007
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,32,0.1017
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,64,0.1029
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,128,0.1032
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,256,0.1036
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,512,0.1050
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,1024,0.1059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,2048,0.1164
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,4096,0.1192
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,8192,0.1214
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,16384,0.1234
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,1,0.0968
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,4,0.0968
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,8,0.0967
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,16,0.0971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,32,0.0972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,64,0.0999
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,128,0.1008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,256,0.1008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,512,0.1030
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,1024,0.1048
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,2048,0.1160
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,4096,0.1199
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,8192,0.1217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,16384,0.1259
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,1,0.0965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,4,0.0969
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,8,0.0971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,16,0.0972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,32,0.0970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,64,0.1009
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,128,0.1013
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,256,0.1034
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,512,0.1050
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,1024,0.1086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,2048,0.1224
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,4096,0.1273
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,8192,0.1298
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,16384,0.1372
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,1,0.0991
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,4,0.0990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,8,0.0992
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,16,0.0996
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,32,0.0992
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,64,0.1034
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,128,0.1050
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,256,0.1074
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,512,0.1104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,1024,0.1161
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,2048,0.1332
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,4096,0.1387
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,1,0.1019
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,4,0.1019
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,8,0.1018
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,16,0.1020
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,32,0.1022
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,64,0.1048
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,128,0.1122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,256,0.1133
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,512,0.1195
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,1024,0.1304
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,2048,0.1576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,4096,0.1659
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,1,0.1145
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,4,0.1145
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,8,0.1147
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,16,0.1149
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,32,0.1152
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,64,0.1181
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,128,0.1216
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,256,0.1277
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,512,0.1388
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,1024,0.1584
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,2048,0.2038
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,1,0.1416
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,4,0.1418
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,8,0.1419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,16,0.1420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,32,0.1434
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,64,0.1480
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,128,0.1555
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,256,0.1671
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,512,0.1886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,1024,0.2282
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,1,0.2074
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,4,0.2079
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,8,0.2081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,16,0.2094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,32,0.2117
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,64,0.2221
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,128,0.2334
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,256,0.2554
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,512,0.2979
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,1,0.3348
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,4,0.3366
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,8,0.3367
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,16,0.3385
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,32,0.3420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,64,0.3579
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,128,0.3787
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,256,0.4214
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,1,0.0847
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,4,0.0845
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,8,0.0848
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,16,0.0844
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,32,0.0849
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,64,0.0862
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,128,0.0870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,256,0.0876
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,512,0.0888
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,1024,0.0891
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,2048,0.0990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,4096,0.1027
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,8192,0.1036
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,16384,0.1077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,1,0.0857
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,4,0.0854
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,8,0.0850
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,16,0.0850
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,32,0.0856
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,64,0.0870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,128,0.0871
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,256,0.0888
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,512,0.0895
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,1024,0.0918
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,2048,0.0996
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,4096,0.1059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,8192,0.1077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,16384,0.1117
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,1,0.0862
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,4,0.0867
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,8,0.0865
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,16,0.0865
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,32,0.0868
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,64,0.0886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,128,0.0886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,256,0.0897
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,512,0.0934
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,1024,0.0960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,2048,0.1069
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,4096,0.1105
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,8192,0.1125
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,16384,0.1153
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,1,0.0833
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,4,0.0831
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,8,0.0832
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,16,0.0833
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,32,0.0835
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,64,0.0868
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,128,0.0871
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,256,0.0902
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,512,0.0925
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,1024,0.0951
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,2048,0.1063
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,4096,0.1109
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,8192,0.1122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,16384,0.1174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,1,0.0844
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,4,0.0840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,8,0.0837
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,16,0.0840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,32,0.0839
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,64,0.0879
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,128,0.0893
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,256,0.0929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,512,0.0956
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,1024,0.0993
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,2048,0.1136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,4096,0.1179
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,8192,0.1210
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,16384,0.1273
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,1,0.0870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,4,0.0872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,8,0.0862
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,16,0.0873
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,32,0.0879
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,64,0.0930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,128,0.0961
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,256,0.0987
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,512,0.1020
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,1024,0.1076
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,2048,0.1243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,4096,0.1301
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,1,0.0916
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,4,0.0919
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,8,0.0913
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,16,0.0925
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,32,0.0935
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,64,0.0968
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,128,0.1013
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,256,0.1044
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,512,0.1098
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,1024,0.1199
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,2048,0.1469
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,4096,0.1557
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,1,0.1041
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,4,0.1046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,8,0.1045
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,16,0.1053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,32,0.1054
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,64,0.1084
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,128,0.1110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,256,0.1175
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,512,0.1280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,1024,0.1485
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,2048,0.1936
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,1,0.1277
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,4,0.1277
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,8,0.1284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,16,0.1280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,32,0.1288
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,64,0.1331
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,128,0.1400
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,256,0.1510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,512,0.1719
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,1024,0.2114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,1,0.1826
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,4,0.1834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,8,0.1833
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,16,0.1835
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,32,0.1852
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,64,0.1951
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,128,0.2055
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,256,0.2284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,512,0.2706
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,1,0.2754
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,4,0.2770
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,8,0.2772
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,16,0.2793
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,32,0.2825
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,64,0.2991
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,128,0.3199
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,256,0.3623
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,1,0.0832
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,4,0.0831
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,8,0.0831
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,16,0.0832
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,32,0.0831
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,64,0.0842
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,128,0.0844
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,256,0.0854
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,512,0.0851
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,1024,0.0857
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,2048,0.0924
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,4096,0.0986
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,8192,0.0987
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,16384,0.1024
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,1,0.0817
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,4,0.0815
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,8,0.0817
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,16,0.0816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,32,0.0818
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,64,0.0830
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,128,0.0829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,256,0.0834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,512,0.0840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,1024,0.0853
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,2048,0.0915
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,4096,0.0991
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,8192,0.0996
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,16384,0.1031
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,1,0.0822
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,4,0.0819
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,8,0.0821
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,16,0.0822
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,32,0.0819
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,64,0.0830
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,128,0.0833
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,256,0.0838
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,512,0.0858
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,1024,0.0868
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,2048,0.0953
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,4096,0.1017
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,8192,0.1044
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,16384,0.1066
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,1,0.0778
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,4,0.0777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,8,0.0777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,16,0.0778
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,32,0.0779
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,64,0.0794
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,128,0.0800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,256,0.0814
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,512,0.0825
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,1024,0.0858
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,2048,0.0990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,4096,0.1047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,8192,0.1068
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,16384,0.1130
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,1,0.0791
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,4,0.0790
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,8,0.0792
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,16,0.0791
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,32,0.0794
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,64,0.0811
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,128,0.0823
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,256,0.0828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,512,0.0869
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,1024,0.0930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,2048,0.1077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,4096,0.1124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,8192,0.1158
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,16384,0.1230
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,1,0.0809
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,4,0.0799
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,8,0.0809
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,16,0.0800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,32,0.0810
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,64,0.0836
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,128,0.0875
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,256,0.0892
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,512,0.0959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,1024,0.1019
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,2048,0.1187
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,4096,0.1246
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,1,0.0836
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,4,0.0833
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,8,0.0836
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,16,0.0840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,32,0.0846
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,64,0.0865
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,128,0.0929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,256,0.0980
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,512,0.1044
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,1024,0.1154
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,2048,0.1419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,4096,0.1506
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,1,0.0965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,4,0.0966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,8,0.0967
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,16,0.0971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,32,0.0987
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,64,0.1024
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,128,0.1076
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,256,0.1138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,512,0.1243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,1024,0.1438
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,2048,0.1889
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,1,0.1183
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,4,0.1186
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,8,0.1191
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,16,0.1199
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,32,0.1209
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,64,0.1261
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,128,0.1329
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,256,0.1433
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,512,0.1646
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,1024,0.2032
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,1,0.1680
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,4,0.1690
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,8,0.1689
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,16,0.1693
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,32,0.1702
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,64,0.1797
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,128,0.1916
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,256,0.2129
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,512,0.2539
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,1,0.2516
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,4,0.2522
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,8,0.2531
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,16,0.2540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,32,0.2566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,64,0.2740
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,128,0.2948
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,256,0.3350
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,1,0.1541
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,4,0.1544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,8,0.1541
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,16,0.1536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,32,0.1538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,64,0.1543
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,128,0.1544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,256,0.1544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,512,0.1549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,1024,0.1551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,2048,0.1670
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,4096,0.1708
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,8192,0.1706
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,16384,0.1733
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,1,0.1532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,4,0.1533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,8,0.1532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,16,0.1532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,32,0.1529
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,64,0.1533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,128,0.1537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,256,0.1539
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,512,0.1540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,1024,0.1541
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,2048,0.1603
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,4096,0.1679
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,8192,0.1675
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,16384,0.1703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,1,0.1582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,4,0.1582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,8,0.1576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,16,0.1581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,32,0.1581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,64,0.1578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,128,0.1588
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,256,0.1589
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,512,0.1591
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,1024,0.1591
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,2048,0.1661
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,4096,0.1693
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,8192,0.1708
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,16384,0.1744
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,1,0.1597
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,4,0.1597
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,8,0.1595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,16,0.1601
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,32,0.1597
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,64,0.1604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,128,0.1599
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,256,0.1609
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,512,0.1610
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,1024,0.1616
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,2048,0.1710
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,4096,0.1750
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,8192,0.1771
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,16384,0.1797
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,1,0.1702
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,4,0.1703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,8,0.1702
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,16,0.1703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,32,0.1710
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,64,0.1702
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,128,0.1710
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,256,0.1709
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,512,0.1705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,1024,0.1720
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,2048,0.1818
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,4096,0.1864
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,8192,0.1882
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,16384,0.1965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,1,0.1884
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,4,0.1876
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,8,0.1884
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,16,0.1888
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,32,0.1886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,64,0.1891
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,128,0.1894
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,256,0.1904
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,512,0.1917
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,1024,0.1950
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,2048,0.2025
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,4096,0.2085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,1,0.2125
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,4,0.2118
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,8,0.2121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,16,0.2121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,32,0.2122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,64,0.2130
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,128,0.2139
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,256,0.2141
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,512,0.2152
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,1024,0.2186
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,2048,0.2346
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,4096,0.2433
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,1,0.3007
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,4,0.2996
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,8,0.3004
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,16,0.3010
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,32,0.3023
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,64,0.3026
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,128,0.3046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,256,0.3063
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,512,0.3101
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,1024,0.3187
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,2048,0.3443
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,1,0.4939
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,4,0.4924
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,8,0.4927
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,16,0.4932
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,32,0.4963
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,64,0.4979
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,128,0.4986
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,256,0.5043
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,512,0.5126
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,1024,0.5299
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,1,0.8974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,4,0.8964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,8,0.8975
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,16,0.8999
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,32,0.9022
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,64,0.9030
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,128,0.9050
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,256,0.9135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,512,0.9375
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,1,1.6886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,4,1.6889
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,8,1.6906
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,16,1.6952
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,32,1.6974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,64,1.7011
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,128,1.7094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,256,1.7276
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,1,0.1219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,4,0.1221
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,8,0.1218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,16,0.1218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,32,0.1220
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,64,0.1227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,128,0.1222
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,256,0.1228
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,512,0.1232
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,1024,0.1233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,2048,0.1274
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,4096,0.1351
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,8192,0.1357
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,16384,0.1381
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,1,0.1241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,4,0.1238
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,8,0.1241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,16,0.1241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,32,0.1241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,64,0.1243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,128,0.1243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,256,0.1248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,512,0.1248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,1024,0.1256
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,2048,0.1294
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,4096,0.1364
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,8192,0.1375
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,16384,0.1403
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,1,0.1282
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,4,0.1280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,8,0.1279
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,16,0.1283
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,32,0.1286
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,64,0.1281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,128,0.1291
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,256,0.1290
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,512,0.1295
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,1024,0.1296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,2048,0.1343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,4096,0.1373
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,8192,0.1392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,16384,0.1411
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,1,0.1244
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,4,0.1242
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,8,0.1244
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,16,0.1248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,32,0.1256
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,64,0.1253
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,128,0.1260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,256,0.1262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,512,0.1265
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,1024,0.1265
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,2048,0.1342
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,4096,0.1391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,8192,0.1400
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,16384,0.1450
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,1,0.1293
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,4,0.1294
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,8,0.1293
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,16,0.1295
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,32,0.1298
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,64,0.1298
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,128,0.1305
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,256,0.1309
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,512,0.1318
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,1024,0.1323
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,2048,0.1428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,4096,0.1468
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,8192,0.1504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,16384,0.1570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,1,0.1438
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,4,0.1442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,8,0.1442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,16,0.1442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,32,0.1439
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,64,0.1443
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,128,0.1448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,256,0.1444
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,512,0.1461
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,1024,0.1479
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,2048,0.1606
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,4096,0.1674
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,1,0.1672
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,4,0.1673
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,8,0.1672
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,16,0.1670
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,32,0.1675
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,64,0.1677
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,128,0.1682
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,256,0.1700
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,512,0.1729
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,1024,0.1817
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,2048,0.1939
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,4096,0.2014
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,1,0.2065
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,4,0.2072
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,8,0.2066
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,16,0.2074
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,32,0.2075
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,64,0.2083
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,128,0.2094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,256,0.2122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,512,0.2179
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,1024,0.2266
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,2048,0.2502
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,1,0.3151
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,4,0.3148
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,8,0.3154
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,16,0.3157
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,32,0.3167
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,64,0.3188
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,128,0.3204
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,256,0.3255
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,512,0.3371
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,1024,0.3540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,1,0.5560
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,4,0.5564
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,8,0.5569
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,16,0.5590
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,32,0.5613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,64,0.5642
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,128,0.5696
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,256,0.5831
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,512,0.6018
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,1,1.0336
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,4,1.0352
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,8,1.0331
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,16,1.0351
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,32,1.0377
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,64,1.0428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,128,1.0563
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,256,1.0683
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,1,0.1085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,4,0.1089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,8,0.1086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,16,0.1089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,32,0.1086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,64,0.1094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,128,0.1096
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,256,0.1097
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,512,0.1096
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,1024,0.1095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,2048,0.1133
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,4096,0.1211
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,8192,0.1210
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,16384,0.1232
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,1,0.1136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,4,0.1137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,8,0.1134
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,16,0.1136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,32,0.1134
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,64,0.1138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,128,0.1139
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,256,0.1143
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,512,0.1142
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,1024,0.1147
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,2048,0.1189
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,4096,0.1257
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,8192,0.1269
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,16384,0.1295
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,1,0.1133
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,4,0.1133
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,8,0.1132
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,16,0.1133
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,32,0.1136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,64,0.1140
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,128,0.1142
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,256,0.1146
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,512,0.1148
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,1024,0.1152
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,2048,0.1232
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,4096,0.1271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,8192,0.1280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,16384,0.1322
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,1,0.1127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,4,0.1131
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,8,0.1127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,16,0.1129
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,32,0.1128
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,64,0.1131
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,128,0.1137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,256,0.1145
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,512,0.1148
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,1024,0.1150
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,2048,0.1233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,4096,0.1277
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,8192,0.1297
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,16384,0.1338
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,1,0.1198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,4,0.1199
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,8,0.1201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,16,0.1205
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,32,0.1203
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,64,0.1205
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,128,0.1208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,256,0.1210
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,512,0.1227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,1024,0.1227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,2048,0.1334
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,4096,0.1375
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,8192,0.1410
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,16384,0.1482
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,1,0.1341
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,4,0.1340
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,8,0.1351
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,16,0.1346
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,32,0.1350
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,64,0.1351
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,128,0.1355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,256,0.1340
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,512,0.1366
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,1024,0.1392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,2048,0.1510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,4096,0.1569
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,1,0.1586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,4,0.1584
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,8,0.1582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,16,0.1582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,32,0.1589
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,64,0.1593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,128,0.1604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,256,0.1617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,512,0.1649
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,1024,0.1728
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,2048,0.1856
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,4096,0.1937
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,1,0.1985
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,4,0.1981
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,8,0.1990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,16,0.1994
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,32,0.1998
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,64,0.2007
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,128,0.2010
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,256,0.2036
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,512,0.2090
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,1024,0.2173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,2048,0.2410
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,1,0.3042
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,4,0.3048
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,8,0.3061
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,16,0.3060
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,32,0.3065
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,64,0.3082
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,128,0.3100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,256,0.3155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,512,0.3265
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,1024,0.3440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,1,0.5364
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,4,0.5378
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,8,0.5382
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,16,0.5380
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,32,0.5401
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,64,0.5437
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,128,0.5489
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,256,0.5604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,512,0.5768
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,1,0.9971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,4,0.9970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,8,0.9949
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,16,0.9988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,32,1.0008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,64,1.0063
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,128,1.0171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,256,1.0296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,1,0.0975
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,4,0.0969
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,8,0.0969
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,16,0.0972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,32,0.0967
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,64,0.0974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,128,0.0977
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,256,0.0983
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,512,0.0983
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,1024,0.0986
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,2048,0.1035
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,4096,0.1099
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,8192,0.1104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,16384,0.1141
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,1,0.1053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,4,0.1051
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,8,0.1055
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,16,0.1055
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,32,0.1052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,64,0.1061
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,128,0.1056
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,256,0.1063
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,512,0.1065
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,1024,0.1073
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,2048,0.1129
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,4096,0.1193
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,8192,0.1204
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,16384,0.1235
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,1,0.1043
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,4,0.1039
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,8,0.1040
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,16,0.1038
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,32,0.1039
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,64,0.1047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,128,0.1052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,256,0.1060
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,512,0.1066
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,1024,0.1066
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,2048,0.1161
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,4096,0.1193
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,8192,0.1208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,16384,0.1247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,1,0.1058
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,4,0.1064
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,8,0.1059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,16,0.1061
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,32,0.1062
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,64,0.1068
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,128,0.1070
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,256,0.1081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,512,0.1081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,1024,0.1085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,2048,0.1174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,4096,0.1218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,8192,0.1238
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,16384,0.1284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,1,0.1121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,4,0.1119
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,8,0.1125
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,16,0.1122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,32,0.1124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,64,0.1130
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,128,0.1134
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,256,0.1140
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,512,0.1150
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,1024,0.1160
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,2048,0.1260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,4096,0.1309
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,8192,0.1341
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,16384,0.1414
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,1,0.1267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,4,0.1267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,8,0.1267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,16,0.1264
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,32,0.1268
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,64,0.1269
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,128,0.1276
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,256,0.1269
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,512,0.1294
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,1024,0.1317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,2048,0.1443
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,4096,0.1502
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,1,0.1492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,4,0.1491
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,8,0.1487
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,16,0.1489
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,32,0.1498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,64,0.1500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,128,0.1511
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,256,0.1534
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,512,0.1562
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,1024,0.1637
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,2048,0.1767
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,4096,0.1847
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,1,0.1863
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,4,0.1870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,8,0.1871
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,16,0.1864
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,32,0.1873
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,64,0.1885
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,128,0.1896
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,256,0.1929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,512,0.1970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,1024,0.2062
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,2048,0.2290
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,1,0.2875
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,4,0.2877
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,8,0.2884
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,16,0.2891
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,32,0.2898
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,64,0.2914
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,128,0.2941
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,256,0.2993
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,512,0.3089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,1024,0.3263
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,1,0.5002
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,4,0.5010
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,8,0.5018
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,16,0.5026
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,32,0.5039
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,64,0.5086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,128,0.5129
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,256,0.5247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,512,0.5425
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,1,0.9182
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,4,0.9188
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,8,0.9197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,16,0.9185
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,32,0.9250
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,64,0.9306
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,128,0.9371
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,256,0.9536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,1,0.0930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,4,0.0930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,8,0.0929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,16,0.0929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,32,0.0929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,64,0.0928
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,128,0.0928
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,256,0.0931
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,512,0.0931
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,1024,0.0933
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,2048,0.0979
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,4096,0.1052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,8192,0.1066
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,16384,0.1095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,1,0.0962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,4,0.0960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,8,0.0959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,16,0.0959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,32,0.0959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,64,0.0962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,128,0.0959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,256,0.0959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,512,0.0961
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,1024,0.0964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,2048,0.1050
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,4096,0.1093
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,8192,0.1105
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,16384,0.1140
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,1,0.0974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,4,0.0971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,8,0.0976
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,16,0.0970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,32,0.0972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,64,0.0973
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,128,0.0969
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,256,0.0972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,512,0.0978
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,1024,0.0987
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,2048,0.1076
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,4096,0.1124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,8192,0.1137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,16384,0.1185
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,1,0.0956
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,4,0.0966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,8,0.0959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,16,0.0962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,32,0.0957
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,64,0.0964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,128,0.0971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,256,0.0980
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,512,0.0986
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,1024,0.1015
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,2048,0.1120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,4096,0.1170
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,8192,0.1194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,16384,0.1241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,1,0.1050
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,4,0.1052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,8,0.1052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,16,0.1049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,32,0.1058
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,64,0.1056
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,128,0.1070
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,256,0.1078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,512,0.1102
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,1024,0.1115
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,2048,0.1230
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,4096,0.1271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,8192,0.1305
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,16384,0.1374
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,1,0.1211
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,4,0.1209
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,8,0.1211
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,16,0.1213
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,32,0.1210
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,64,0.1219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,128,0.1225
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,256,0.1219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,512,0.1252
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,1024,0.1275
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,2048,0.1407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,4096,0.1465
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,1,0.1442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,4,0.1443
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,8,0.1441
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,16,0.1447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,32,0.1447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,64,0.1459
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,128,0.1466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,256,0.1487
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,512,0.1518
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,1024,0.1600
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,2048,0.1728
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,4096,0.1811
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,1,0.1801
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,4,0.1803
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,8,0.1800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,16,0.1807
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,32,0.1813
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,64,0.1828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,128,0.1846
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,256,0.1873
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,512,0.1922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,1024,0.1995
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,2048,0.2247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,1,0.2789
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,4,0.2784
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,8,0.2788
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,16,0.2794
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,32,0.2803
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,64,0.2828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,128,0.2847
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,256,0.2901
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,512,0.2990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,1024,0.3161
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,1,0.4817
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,4,0.4815
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,8,0.4824
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,16,0.4838
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,32,0.4866
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,64,0.4909
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,128,0.4951
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,256,0.5061
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,512,0.5243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,1,0.8800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,4,0.8827
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,8,0.8845
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,16,0.8832
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,32,0.8859
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,64,0.8934
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,128,0.9000
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,256,0.9190
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,1,0.1449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,4,0.1443
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,8,0.1447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,16,0.1446
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,32,0.1445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,64,0.1472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,128,0.1483
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,256,0.1485
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,512,0.1482
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,1024,0.1501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,2048,0.1555
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,4096,0.1648
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,8192,0.1649
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,16384,0.1674
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,1,0.1464
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,4,0.1462
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,8,0.1461
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,16,0.1460
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,32,0.1463
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,64,0.1491
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,128,0.1497
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,256,0.1504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,512,0.1512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,1024,0.1534
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,2048,0.1604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,4096,0.1642
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,8192,0.1653
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,16384,0.1683
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,1,0.1482
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,4,0.1482
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,8,0.1486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,16,0.1484
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,32,0.1486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,64,0.1513
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,128,0.1519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,256,0.1526
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,512,0.1557
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,1024,0.1576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,2048,0.1655
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,4096,0.1713
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,8192,0.1732
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,16384,0.1762
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,1,0.1427
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,4,0.1425
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,8,0.1428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,16,0.1429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,32,0.1430
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,64,0.1470
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,128,0.1480
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,256,0.1505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,512,0.1521
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,1024,0.1549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,2048,0.1667
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,4096,0.1709
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,8192,0.1728
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,16384,0.1777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,1,0.1436
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,4,0.1449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,8,0.1453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,16,0.1448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,32,0.1451
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,64,0.1519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,128,0.1540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,256,0.1554
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,512,0.1586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,1024,0.1628
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,2048,0.1786
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,4096,0.1797
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,8192,0.1865
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,16384,0.1926
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,1,0.1429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,4,0.1449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,8,0.1430
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,16,0.1430
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,32,0.1438
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,64,0.1454
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,128,0.1576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,256,0.1617
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,512,0.1625
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,1024,0.1723
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,2048,0.1957
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,4096,0.1993
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,1,0.1490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,4,0.1494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,8,0.1497
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,16,0.1489
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,32,0.1495
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,64,0.1517
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,128,0.1535
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,256,0.1570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,512,0.1643
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,1024,0.1787
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,2048,0.2139
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,4096,0.2223
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,1,0.1767
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,4,0.1777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,8,0.1780
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,16,0.1781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,32,0.1782
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,64,0.1828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,128,0.1871
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,256,0.1959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,512,0.2119
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,1024,0.2412
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,2048,0.3044
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,1,0.2464
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,4,0.2474
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,8,0.2479
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,16,0.2482
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,32,0.2493
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,64,0.2557
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,128,0.2645
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,256,0.2792
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,512,0.3095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,1024,0.3669
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,1,0.4068
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,4,0.4095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,8,0.4105
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,16,0.4117
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,32,0.4134
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,64,0.4213
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,128,0.4368
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,256,0.4681
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,512,0.5264
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,1,0.7408
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,4,0.7299
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,8,0.7447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,16,0.7522
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,32,0.7540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,64,0.7705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,128,0.7948
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,256,0.8433
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,1,0.1153
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,4,0.1159
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,8,0.1151
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,16,0.1151
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,32,0.1155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,64,0.1181
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,128,0.1192
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,256,0.1189
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,512,0.1196
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,1024,0.1204
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,2048,0.1283
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,4096,0.1320
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,8192,0.1333
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,16384,0.1357
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,1,0.1145
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,4,0.1147
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,8,0.1148
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,16,0.1149
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,32,0.1146
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,64,0.1169
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,128,0.1176
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,256,0.1174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,512,0.1185
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,1024,0.1200
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,2048,0.1291
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,4096,0.1322
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,8192,0.1321
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,16384,0.1358
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,1,0.1163
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,4,0.1163
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,8,0.1163
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,16,0.1169
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,32,0.1165
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,64,0.1190
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,128,0.1199
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,256,0.1201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,512,0.1212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,1024,0.1235
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,2048,0.1336
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,4096,0.1360
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,8192,0.1377
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,16384,0.1419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,1,0.1133
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,4,0.1131
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,8,0.1131
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,16,0.1131
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,32,0.1138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,64,0.1161
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,128,0.1179
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,256,0.1183
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,512,0.1215
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,1024,0.1230
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,2048,0.1350
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,4096,0.1388
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,8192,0.1408
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,16384,0.1447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,1,0.1118
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,4,0.1125
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,8,0.1121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,16,0.1125
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,32,0.1122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,64,0.1167
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,128,0.1186
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,256,0.1207
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,512,0.1219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,1024,0.1261
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,2048,0.1396
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,4096,0.1436
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,8192,0.1467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,16384,0.1533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,1,0.1131
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,4,0.1130
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,8,0.1131
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,16,0.1128
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,32,0.1130
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,64,0.1205
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,128,0.1244
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,256,0.1249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,512,0.1301
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,1024,0.1352
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,2048,0.1537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,4096,0.1593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,1,0.1175
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,4,0.1176
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,8,0.1174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,16,0.1176
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,32,0.1178
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,64,0.1202
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,128,0.1319
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,256,0.1354
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,512,0.1414
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,1024,0.1529
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,2048,0.1801
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,4096,0.1883
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,1,0.1338
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,4,0.1329
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,8,0.1339
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,16,0.1338
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,32,0.1341
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,64,0.1375
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,128,0.1400
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,256,0.1463
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,512,0.1573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,1024,0.1782
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,2048,0.2233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,1,0.1694
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,4,0.1700
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,8,0.1705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,16,0.1706
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,32,0.1722
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,64,0.1766
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,128,0.1836
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,256,0.1963
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,512,0.2178
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,1024,0.2586
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,1,0.2687
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,4,0.2704
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,8,0.2704
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,16,0.2712
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,32,0.2736
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,64,0.2822
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,128,0.2928
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,256,0.3162
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,512,0.3579
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,1,0.4536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,4,0.4561
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,8,0.4566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,16,0.4578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,32,0.4603
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,64,0.4738
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,128,0.4920
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,256,0.5347
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,1,0.1008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,4,0.1005
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,8,0.1005
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,16,0.1011
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,32,0.1008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,64,0.1026
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,128,0.1032
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,256,0.1034
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,512,0.1045
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,1024,0.1047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,2048,0.1100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,4096,0.1170
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,8192,0.1180
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,16384,0.1211
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,1,0.1006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,4,0.1005
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,8,0.1007
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,16,0.1006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,32,0.1005
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,64,0.1027
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,128,0.1026
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,256,0.1025
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,512,0.1035
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,1024,0.1043
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,2048,0.1102
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,4096,0.1172
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,8192,0.1171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,16384,0.1199
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,1,0.1008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,4,0.1008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,8,0.1006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,16,0.1007
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,32,0.1011
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,64,0.1033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,128,0.1032
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,256,0.1040
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,512,0.1049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,1024,0.1059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,2048,0.1162
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,4096,0.1192
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,8192,0.1212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,16384,0.1242
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,1,0.0968
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,4,0.0966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,8,0.0971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,16,0.0969
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,32,0.0971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,64,0.0994
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,128,0.1006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,256,0.1013
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,512,0.1033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,1024,0.1048
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,2048,0.1160
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,4096,0.1204
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,8192,0.1220
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,16384,0.1260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,1,0.0967
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,4,0.0973
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,8,0.0971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,16,0.0974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,32,0.0973
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,64,0.1005
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,128,0.1015
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,256,0.1038
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,512,0.1044
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,1024,0.1088
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,2048,0.1223
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,4096,0.1268
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,8192,0.1295
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,16384,0.1370
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,1,0.0992
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,4,0.0989
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,8,0.0988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,16,0.0991
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,32,0.0991
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,64,0.1028
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,128,0.1049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,256,0.1070
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,512,0.1100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,1024,0.1155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,2048,0.1336
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,4096,0.1379
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,1,0.1021
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,4,0.1019
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,8,0.1018
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,16,0.1024
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,32,0.1024
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,64,0.1052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,128,0.1121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,256,0.1133
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,512,0.1195
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,1024,0.1309
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,2048,0.1581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,4096,0.1656
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,1,0.1146
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,4,0.1147
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,8,0.1148
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,16,0.1149
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,32,0.1147
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,64,0.1182
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,128,0.1209
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,256,0.1275
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,512,0.1387
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,1024,0.1582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,2048,0.2038
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,1,0.1419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,4,0.1418
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,8,0.1423
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,16,0.1424
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,32,0.1429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,64,0.1477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,128,0.1551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,256,0.1675
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,512,0.1890
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,1024,0.2285
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,1,0.2071
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,4,0.2078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,8,0.2080
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,16,0.2097
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,32,0.2112
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,64,0.2216
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,128,0.2331
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,256,0.2556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,512,0.2980
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,1,0.3342
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,4,0.3362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,8,0.3373
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,16,0.3393
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,32,0.3415
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,64,0.3582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,128,0.3777
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,256,0.4207
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,1,0.0845
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,4,0.0842
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,8,0.0845
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,16,0.0842
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,32,0.0849
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,64,0.0861
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,128,0.0870
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,256,0.0874
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,512,0.0886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,1024,0.0892
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,2048,0.0956
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,4096,0.1026
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,8192,0.1034
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,16384,0.1074
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,1,0.0853
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,4,0.0851
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,8,0.0852
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,16,0.0848
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,32,0.0853
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,64,0.0872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,128,0.0872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,256,0.0885
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,512,0.0898
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,1024,0.0923
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,2048,0.1029
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,4096,0.1064
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,8192,0.1083
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,16384,0.1120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,1,0.0867
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,4,0.0865
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,8,0.0865
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,16,0.0866
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,32,0.0868
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,64,0.0887
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,128,0.0888
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,256,0.0896
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,512,0.0935
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,1024,0.0962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,2048,0.1063
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,4096,0.1106
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,8192,0.1127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,16384,0.1159
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,1,0.0834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,4,0.0830
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,8,0.0832
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,16,0.0832
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,32,0.0836
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,64,0.0866
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,128,0.0872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,256,0.0903
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,512,0.0928
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,1024,0.0952
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,2048,0.1065
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,4096,0.1109
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,8192,0.1122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,16384,0.1176
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,1,0.0845
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,4,0.0842
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,8,0.0836
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,16,0.0840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,32,0.0840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,64,0.0876
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,128,0.0893
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,256,0.0930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,512,0.0956
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,1024,0.0994
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,2048,0.1131
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,4096,0.1179
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,8192,0.1206
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,16384,0.1275
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,1,0.0859
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,4,0.0872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,8,0.0863
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,16,0.0876
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,32,0.0872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,64,0.0932
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,128,0.0961
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,256,0.0985
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,512,0.1021
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,1024,0.1076
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,2048,0.1249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,4096,0.1301
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,1,0.0911
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,4,0.0923
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,8,0.0920
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,16,0.0929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,32,0.0935
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,64,0.0969
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,128,0.1018
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,256,0.1046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,512,0.1097
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,1024,0.1207
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,2048,0.1473
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,4096,0.1556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,1,0.1038
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,4,0.1046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,8,0.1045
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,16,0.1047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,32,0.1052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,64,0.1085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,128,0.1114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,256,0.1172
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,512,0.1284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,1024,0.1485
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,2048,0.1930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,1,0.1277
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,4,0.1277
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,8,0.1283
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,16,0.1280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,32,0.1290
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,64,0.1333
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,128,0.1400
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,256,0.1512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,512,0.1724
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,1024,0.2118
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,1,0.1822
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,4,0.1828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,8,0.1828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,16,0.1832
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,32,0.1845
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,64,0.1949
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,128,0.2058
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,256,0.2286
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,512,0.2703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,1,0.2761
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,4,0.2778
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,8,0.2769
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,16,0.2800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,32,0.2830
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,64,0.2999
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,128,0.3197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,256,0.3615
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,1,0.0831
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,4,0.0828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,8,0.0829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,16,0.0830
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,32,0.0829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,64,0.0839
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,128,0.0844
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,256,0.0845
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,512,0.0850
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,1024,0.0856
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,2048,0.0917
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,4096,0.0985
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,8192,0.0999
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,16384,0.1022
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,1,0.0815
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,4,0.0815
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,8,0.0815
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,16,0.0815
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,32,0.0816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,64,0.0828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,128,0.0829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,256,0.0832
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,512,0.0841
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,1024,0.0855
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,2048,0.0949
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,4096,0.0990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,8192,0.0995
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,16384,0.1030
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,1,0.0819
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,4,0.0818
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,8,0.0820
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,16,0.0818
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,32,0.0820
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,64,0.0831
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,128,0.0836
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,256,0.0840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,512,0.0860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,1024,0.0869
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,2048,0.0950
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,4096,0.1016
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,8192,0.1040
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,16384,0.1077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,1,0.0780
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,4,0.0780
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,8,0.0779
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,16,0.0779
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,32,0.0781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,64,0.0795
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,128,0.0799
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,256,0.0814
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,512,0.0823
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,1024,0.0860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,2048,0.0990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,4096,0.1044
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,8192,0.1067
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,16384,0.1131
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,1,0.0791
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,4,0.0789
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,8,0.0789
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,16,0.0791
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,32,0.0795
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,64,0.0810
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,128,0.0826
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,256,0.0826
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,512,0.0866
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,1024,0.0930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,2048,0.1077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,4096,0.1125
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,8192,0.1156
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,16384,0.1227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,1,0.0807
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,4,0.0798
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,8,0.0807
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,16,0.0798
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,32,0.0808
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,64,0.0834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,128,0.0872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,256,0.0887
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,512,0.0960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,1024,0.1024
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,2048,0.1191
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,4096,0.1244
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,1,0.0835
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,4,0.0834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,8,0.0836
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,16,0.0839
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,32,0.0846
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,64,0.0867
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,128,0.0929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,256,0.0980
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,512,0.1048
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,1024,0.1154
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,2048,0.1423
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,4096,0.1506
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,1,0.0965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,4,0.0964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,8,0.0970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,16,0.0972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,32,0.0987
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,64,0.1024
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,128,0.1075
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,256,0.1138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,512,0.1242
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,1024,0.1438
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,2048,0.1888
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,1,0.1186
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,4,0.1186
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,8,0.1194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,16,0.1201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,32,0.1214
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,64,0.1262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,128,0.1325
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,256,0.1440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,512,0.1649
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,1024,0.2037
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,1,0.1682
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,4,0.1687
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,8,0.1691
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,16,0.1691
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,32,0.1706
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,64,0.1799
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,128,0.1910
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,256,0.2128
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,512,0.2543
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,1,0.2519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,4,0.2528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,8,0.2533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,16,0.2548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,32,0.2566
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,64,0.2747
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,128,0.2952
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,256,0.3355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,1,0.1537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,4,0.1539
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,8,0.1540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,16,0.1541
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,32,0.1540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,64,0.1544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,128,0.1542
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,256,0.1550
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,512,0.1547
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,1024,0.1553
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,2048,0.1614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,4096,0.1714
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,8192,0.1719
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,16384,0.1747
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,1,0.1535
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,4,0.1535
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,8,0.1531
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,16,0.1532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,32,0.1533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,64,0.1532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,128,0.1539
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,256,0.1541
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,512,0.1545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,1024,0.1543
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,2048,0.1604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,4096,0.1677
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,8192,0.1689
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,16384,0.1719
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,1,0.1576
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,4,0.1580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,8,0.1577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,16,0.1585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,32,0.1578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,64,0.1577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,128,0.1594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,256,0.1587
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,512,0.1590
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,1024,0.1594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,2048,0.1663
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,4096,0.1697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,8192,0.1712
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,16384,0.1736
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,1,0.1594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,4,0.1596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,8,0.1597
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,16,0.1596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,32,0.1598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,64,0.1601
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,128,0.1601
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,256,0.1607
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,512,0.1606
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,1024,0.1612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,2048,0.1708
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,4096,0.1749
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,8192,0.1765
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,16384,0.1800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,1,0.1697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,4,0.1707
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,8,0.1699
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,16,0.1704
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,32,0.1702
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,64,0.1706
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,128,0.1706
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,256,0.1713
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,512,0.1708
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,1024,0.1718
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,2048,0.1815
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,4096,0.1864
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,8192,0.1878
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,16384,0.1966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,1,0.1887
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,4,0.1883
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,8,0.1885
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,16,0.1886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,32,0.1884
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,64,0.1887
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,128,0.1891
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,256,0.1907
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,512,0.1913
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,1024,0.1949
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,2048,0.2016
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,4096,0.2081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,1,0.2121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,4,0.2121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,8,0.2124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,16,0.2129
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,32,0.2121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,64,0.2132
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,128,0.2131
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,256,0.2143
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,512,0.2158
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,1024,0.2183
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,2048,0.2341
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,4096,0.2428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,1,0.3012
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,4,0.3007
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,8,0.3006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,16,0.3010
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,32,0.3021
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,64,0.3041
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,128,0.3051
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,256,0.3072
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,512,0.3105
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,1024,0.3199
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,2048,0.3445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,1,0.4933
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,4,0.4942
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,8,0.4944
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,16,0.4944
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,32,0.4957
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,64,0.4984
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,128,0.5016
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,256,0.5051
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,512,0.5141
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,1024,0.5293
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,1,0.8965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,4,0.8983
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,8,0.8962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,16,0.8998
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,32,0.8993
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,64,0.8998
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,128,0.9089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,256,0.9178
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,512,0.9346
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,1,1.6890
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,4,1.6933
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,8,1.6943
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,16,1.6958
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,32,1.6969
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,64,1.7036
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,128,1.7105
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,256,1.7267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,1,0.1218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,4,0.1220
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,8,0.1219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,16,0.1219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,32,0.1221
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,64,0.1227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,128,0.1224
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,256,0.1229
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,512,0.1230
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,1024,0.1230
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,2048,0.1272
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,4096,0.1353
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,8192,0.1355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,16384,0.1367
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,1,0.1241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,4,0.1243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,8,0.1240
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,16,0.1243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,32,0.1239
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,64,0.1242
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,128,0.1242
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,256,0.1250
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,512,0.1246
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,1024,0.1256
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,2048,0.1330
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,4096,0.1366
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,8192,0.1375
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,16384,0.1402
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,1,0.1284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,4,0.1284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,8,0.1279
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,16,0.1284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,32,0.1282
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,64,0.1280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,128,0.1291
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,256,0.1289
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,512,0.1292
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,1024,0.1295
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,2048,0.1343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,4096,0.1377
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,8192,0.1392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,16384,0.1425
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,1,0.1248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,4,0.1247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,8,0.1247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,16,0.1249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,32,0.1253
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,64,0.1254
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,128,0.1260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,256,0.1260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,512,0.1263
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,1024,0.1267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,2048,0.1341
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,4096,0.1388
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,8192,0.1402
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,16384,0.1445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,1,0.1298
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,4,0.1297
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,8,0.1293
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,16,0.1297
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,32,0.1300
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,64,0.1302
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,128,0.1302
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,256,0.1305
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,512,0.1313
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,1024,0.1318
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,2048,0.1424
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,4096,0.1467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,8192,0.1504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,16384,0.1573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,1,0.1435
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,4,0.1439
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,8,0.1434
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,16,0.1441
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,32,0.1441
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,64,0.1446
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,128,0.1448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,256,0.1432
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,512,0.1460
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,1024,0.1480
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,2048,0.1612
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,4096,0.1666
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,1,0.1664
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,4,0.1664
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,8,0.1667
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,16,0.1669
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,32,0.1673
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,64,0.1683
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,128,0.1685
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,256,0.1699
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,512,0.1732
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,1024,0.1816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,2048,0.1940
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,4096,0.2023
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,1,0.2068
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,4,0.2063
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,8,0.2065
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,16,0.2073
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,32,0.2072
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,64,0.2083
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,128,0.2095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,256,0.2121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,512,0.2183
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,1024,0.2266
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,2048,0.2501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,1,0.3153
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,4,0.3145
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,8,0.3150
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,16,0.3154
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,32,0.3165
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,64,0.3186
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,128,0.3216
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,256,0.3256
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,512,0.3371
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,1024,0.3549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,1,0.5573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,4,0.5558
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,8,0.5563
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,16,0.5593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,32,0.5611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,64,0.5659
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,128,0.5724
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,256,0.5828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,512,0.6003
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,1,1.0340
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,4,1.0317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,8,1.0356
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,16,1.0365
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,32,1.0384
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,64,1.0465
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,128,1.0528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,256,1.0709
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,1,0.1087
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,4,0.1087
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,8,0.1084
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,16,0.1086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,32,0.1087
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,64,0.1093
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,128,0.1095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,256,0.1101
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,512,0.1101
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,1024,0.1098
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,2048,0.1145
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,4096,0.1215
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,8192,0.1220
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,16384,0.1248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,1,0.1135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,4,0.1134
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,8,0.1135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,16,0.1138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,32,0.1137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,64,0.1140
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,128,0.1140
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,256,0.1143
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,512,0.1143
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,1024,0.1145
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,2048,0.1190
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,4096,0.1259
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,8192,0.1271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,16384,0.1296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,1,0.1136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,4,0.1135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,8,0.1136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,16,0.1136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,32,0.1137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,64,0.1142
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,128,0.1146
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,256,0.1148
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,512,0.1153
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,1024,0.1155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,2048,0.1201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,4096,0.1271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,8192,0.1285
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,16384,0.1322
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,1,0.1128
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,4,0.1131
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,8,0.1129
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,16,0.1128
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,32,0.1127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,64,0.1131
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,128,0.1142
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,256,0.1148
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,512,0.1148
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,1024,0.1152
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,2048,0.1233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,4096,0.1280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,8192,0.1294
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,16384,0.1338
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,1,0.1201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,4,0.1201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,8,0.1199
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,16,0.1204
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,32,0.1199
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,64,0.1205
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,128,0.1206
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,256,0.1217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,512,0.1224
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,1024,0.1227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,2048,0.1335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,4096,0.1378
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,8192,0.1409
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,16384,0.1481
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,1,0.1347
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,4,0.1341
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,8,0.1349
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,16,0.1343
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,32,0.1347
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,64,0.1350
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,128,0.1355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,256,0.1347
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,512,0.1364
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,1024,0.1395
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,2048,0.1515
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,4096,0.1571
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,1,0.1585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,4,0.1580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,8,0.1583
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,16,0.1585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,32,0.1593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,64,0.1595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,128,0.1604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,256,0.1621
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,512,0.1648
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,1024,0.1728
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,2048,0.1852
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,4096,0.1944
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,1,0.1986
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,4,0.1986
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,8,0.1987
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,16,0.1994
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,32,0.2001
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,64,0.1999
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,128,0.2010
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,256,0.2038
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,512,0.2089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,1024,0.2174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,2048,0.2408
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,1,0.3047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,4,0.3050
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,8,0.3052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,16,0.3053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,32,0.3061
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,64,0.3083
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,128,0.3111
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,256,0.3172
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,512,0.3260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,1024,0.3433
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,1,0.5367
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,4,0.5363
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,8,0.5375
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,16,0.5376
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,32,0.5396
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,64,0.5440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,128,0.5508
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,256,0.5604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,512,0.5775
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,1,0.9997
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,4,0.9963
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,8,0.9979
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,16,0.9972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,32,0.9990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,64,1.0082
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,128,1.0134
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,256,1.0311
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,1,0.0971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,4,0.0970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,8,0.0970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,16,0.0970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,32,0.0965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,64,0.0972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,128,0.0976
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,256,0.0981
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,512,0.0977
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,1024,0.0984
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,2048,0.1034
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,4096,0.1104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,8192,0.1120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,16384,0.1143
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,1,0.1055
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,4,0.1054
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,8,0.1053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,16,0.1054
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,32,0.1053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,64,0.1059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,128,0.1057
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,256,0.1063
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,512,0.1065
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,1024,0.1074
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,2048,0.1159
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,4096,0.1191
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,8192,0.1202
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,16384,0.1237
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,1,0.1041
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,4,0.1042
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,8,0.1042
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,16,0.1039
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,32,0.1040
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,64,0.1047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,128,0.1052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,256,0.1059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,512,0.1065
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,1024,0.1066
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,2048,0.1158
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,4096,0.1193
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,8192,0.1206
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,16384,0.1248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,1,0.1057
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,4,0.1060
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,8,0.1059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,16,0.1061
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,32,0.1062
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,64,0.1066
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,128,0.1068
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,256,0.1080
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,512,0.1081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,1024,0.1085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,2048,0.1143
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,4096,0.1222
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,8192,0.1235
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,16384,0.1284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,1,0.1119
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,4,0.1122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,8,0.1122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,16,0.1123
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,32,0.1123
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,64,0.1126
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,128,0.1136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,256,0.1142
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,512,0.1151
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,1024,0.1160
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,2048,0.1264
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,4096,0.1311
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,8192,0.1342
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,16384,0.1409
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,1,0.1267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,4,0.1266
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,8,0.1269
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,16,0.1267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,32,0.1270
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,64,0.1271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,128,0.1275
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,256,0.1268
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,512,0.1296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,1024,0.1317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,2048,0.1445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,4096,0.1499
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,1,0.1491
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,4,0.1490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,8,0.1487
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,16,0.1498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,32,0.1494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,64,0.1504
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,128,0.1513
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,256,0.1529
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,512,0.1557
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,1024,0.1640
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,2048,0.1766
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,4096,0.1850
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,1,0.1867
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,4,0.1866
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,8,0.1865
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,16,0.1862
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,32,0.1866
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,64,0.1881
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,128,0.1897
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,256,0.1923
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,512,0.1975
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,1024,0.2062
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,2048,0.2295
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,1,0.2874
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,4,0.2881
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,8,0.2889
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,16,0.2890
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,32,0.2891
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,64,0.2920
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,128,0.2935
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,256,0.2993
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,512,0.3093
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,1024,0.3260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,1,0.5002
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,4,0.5001
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,8,0.5012
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,16,0.5020
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,32,0.5042
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,64,0.5087
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,128,0.5132
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,256,0.5243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,512,0.5426
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,1,0.9190
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,4,0.9198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,8,0.9173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,16,0.9222
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,32,0.9253
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,64,0.9287
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,128,0.9375
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,256,0.9523
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,1,0.0927
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,4,0.0926
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,8,0.0927
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,16,0.0926
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,32,0.0926
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,64,0.0926
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,128,0.0928
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,256,0.0928
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,512,0.0930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,1024,0.0930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,2048,0.0966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,4096,0.1048
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,8192,0.1063
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,16384,0.1090
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,1,0.0962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,4,0.0960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,8,0.0960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,16,0.0960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,32,0.0960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,64,0.0963
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,128,0.0960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,256,0.0962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,512,0.0962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,1024,0.0965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,2048,0.1016
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,4096,0.1094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,8192,0.1104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,16384,0.1141
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,1,0.0972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,4,0.0977
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,8,0.0978
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,16,0.0970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,32,0.0971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,64,0.0972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,128,0.0975
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,256,0.0975
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,512,0.0978
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,1024,0.0994
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,2048,0.1077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,4096,0.1125
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,8192,0.1139
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,16384,0.1187
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,1,0.0957
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,4,0.0962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,8,0.0958
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,16,0.0964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,32,0.0956
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,64,0.0965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,128,0.0971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,256,0.0981
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,512,0.0989
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,1024,0.1011
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,2048,0.1119
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,4096,0.1169
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,8192,0.1192
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,16384,0.1238
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,1,0.1046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,4,0.1049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,8,0.1051
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,16,0.1053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,32,0.1054
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,64,0.1052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,128,0.1067
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,256,0.1076
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,512,0.1099
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,1024,0.1112
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,2048,0.1227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,4096,0.1272
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,8192,0.1302
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,16384,0.1374
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,1,0.1207
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,4,0.1214
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,8,0.1214
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,16,0.1209
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,32,0.1217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,64,0.1219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,128,0.1226
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,256,0.1220
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,512,0.1255
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,1024,0.1282
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,2048,0.1408
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,4096,0.1463
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,1,0.1447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,4,0.1444
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,8,0.1447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,16,0.1445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,32,0.1449
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,64,0.1458
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,128,0.1468
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,256,0.1484
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,512,0.1521
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,1024,0.1598
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,2048,0.1727
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,4096,0.1816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,1,0.1806
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,4,0.1801
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,8,0.1801
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,16,0.1807
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,32,0.1811
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,64,0.1829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,128,0.1840
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,256,0.1872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,512,0.1919
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,1024,0.2006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,2048,0.2242
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,1,0.2782
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,4,0.2785
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,8,0.2789
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,16,0.2792
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,32,0.2806
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,64,0.2825
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,128,0.2847
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,256,0.2894
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,512,0.2988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,1024,0.3163
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,1,0.4815
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,4,0.4820
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,8,0.4825
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,16,0.4839
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,32,0.4853
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,64,0.4907
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,128,0.4960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,256,0.5052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,512,0.5251
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,1,0.8832
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,4,0.8810
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,8,0.8814
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,16,0.8839
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,32,0.8850
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,64,0.8951
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,128,0.9045
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,256,0.9164
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,1,0.1423
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,4,0.1421
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,8,0.1421
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,16,0.1419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,32,0.1419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,64,0.1447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,128,0.1450
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,256,0.1447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,512,0.1451
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,1024,0.1460
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,2048,0.1505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,4096,0.1592
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,8192,0.1606
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,16384,0.1632
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,1,0.1425
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,4,0.1431
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,8,0.1423
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,16,0.1423
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,32,0.1426
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,64,0.1452
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,128,0.1456
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,256,0.1457
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,512,0.1470
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,1024,0.1493
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,2048,0.1548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,4096,0.1613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,8192,0.1628
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,16384,0.1657
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,1,0.1440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,4,0.1444
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,8,0.1440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,16,0.1440
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,32,0.1444
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,64,0.1479
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,128,0.1478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,256,0.1487
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,512,0.1501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,1024,0.1537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,2048,0.1629
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,4096,0.1662
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,8192,0.1680
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,16384,0.1717
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,1,0.1456
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,4,0.1453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,8,0.1456
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,16,0.1458
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,32,0.1460
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,64,0.1493
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,128,0.1510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,256,0.1517
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,512,0.1559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,1024,0.1575
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,2048,0.1659
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,4096,0.1741
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,8192,0.1758
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,16384,0.1809
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,1,0.1478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,4,0.1473
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,8,0.1484
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,16,0.1477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,32,0.1476
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,64,0.1530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,128,0.1549
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,256,0.1574
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,512,0.1593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,1024,0.1637
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,2048,0.1774
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,4096,0.1822
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,8192,0.1850
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,16384,0.1917
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,1,0.1508
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,4,0.1506
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,8,0.1514
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,16,0.1517
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,32,0.1512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,64,0.1609
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,128,0.1638
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,256,0.1656
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,512,0.1685
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,1024,0.1759
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,2048,0.1931
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,4096,0.1990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,1,0.1627
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,4,0.1620
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,8,0.1618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,16,0.1622
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,32,0.1629
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,64,0.1652
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,128,0.1772
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,256,0.1805
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,512,0.1867
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,1024,0.1971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,2048,0.2248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,4096,0.2331
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,1,0.1772
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,4,0.1771
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,8,0.1772
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,16,0.1771
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,32,0.1779
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,64,0.1806
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,128,0.1829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,256,0.1873
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,512,0.1991
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,1024,0.2195
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,2048,0.2635
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,1,0.2309
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,4,0.2310
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,8,0.2317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,16,0.2309
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,32,0.2323
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,64,0.2372
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,128,0.2434
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,256,0.2544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,512,0.2769
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,1024,0.3165
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,1,0.3823
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,4,0.3838
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,8,0.3847
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,16,0.3844
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,32,0.3863
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,64,0.3946
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,128,0.4054
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,256,0.4265
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,512,0.4699
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,1,0.7130
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,4,0.7141
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,8,0.7175
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,16,0.7171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,32,0.7172
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,64,0.7312
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,128,0.7508
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,256,0.7934
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,1,0.1068
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,4,0.1061
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,8,0.1067
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,16,0.1059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,32,0.1063
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,64,0.1082
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,128,0.1089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,256,0.1085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,512,0.1097
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,1024,0.1094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,2048,0.1149
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,4096,0.1244
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,8192,0.1263
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,16384,0.1264
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,1,0.1089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,4,0.1086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,8,0.1086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,16,0.1084
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,32,0.1091
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,64,0.1107
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,128,0.1110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,256,0.1105
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,512,0.1111
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,1024,0.1135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,2048,0.1227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,4096,0.1259
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,8192,0.1267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,16384,0.1277
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,1,0.1083
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,4,0.1082
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,8,0.1084
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,16,0.1085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,32,0.1085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,64,0.1107
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,128,0.1106
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,256,0.1115
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,512,0.1122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,1024,0.1137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,2048,0.1197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,4096,0.1281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,8192,0.1286
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,16384,0.1317
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,1,0.1085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,4,0.1086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,8,0.1084
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,16,0.1089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,32,0.1086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,64,0.1114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,128,0.1116
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,256,0.1127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,512,0.1146
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,1024,0.1162
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,2048,0.1281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,4096,0.1325
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,8192,0.1342
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,16384,0.1392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,1,0.1098
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,4,0.1102
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,8,0.1103
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,16,0.1098
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,32,0.1101
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,64,0.1140
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,128,0.1146
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,256,0.1170
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,512,0.1182
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,1024,0.1223
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,2048,0.1352
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,4096,0.1396
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,8192,0.1429
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,16384,0.1496
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,1,0.1125
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,4,0.1126
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,8,0.1127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,16,0.1125
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,32,0.1128
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,64,0.1174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,128,0.1191
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,256,0.1212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,512,0.1241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,1024,0.1304
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,2048,0.1472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,4096,0.1524
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,1,0.1162
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,4,0.1163
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,8,0.1167
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,16,0.1170
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,32,0.1171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,64,0.1197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,128,0.1268
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,256,0.1291
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,512,0.1341
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,1024,0.1445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,2048,0.1718
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,4096,0.1798
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,1,0.1260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,4,0.1268
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,8,0.1263
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,16,0.1267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,32,0.1274
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,64,0.1300
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,128,0.1322
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,256,0.1381
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,512,0.1490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,1024,0.1694
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,2048,0.2124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,1,0.1591
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,4,0.1593
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,8,0.1597
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,16,0.1603
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,32,0.1600
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,64,0.1649
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,128,0.1714
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,256,0.1823
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,512,0.2036
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,1024,0.2428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,1,0.2454
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,4,0.2467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,8,0.2470
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,16,0.2471
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,32,0.2488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,64,0.2585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,128,0.2688
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,256,0.2918
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,512,0.3335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,1,0.4342
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,4,0.4356
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,8,0.4356
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,16,0.4362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,32,0.4380
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,64,0.4545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,128,0.4755
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,256,0.5172
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,1,0.0893
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,4,0.0887
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,8,0.0893
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,16,0.0884
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,32,0.0889
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,64,0.0903
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,128,0.0909
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,256,0.0910
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,512,0.0911
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,1024,0.0922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,2048,0.0974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,4096,0.1039
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,8192,0.1057
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,16384,0.1083
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,1,0.0892
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,4,0.0889
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,8,0.0894
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,16,0.0891
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,32,0.0895
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,64,0.0911
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,128,0.0917
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,256,0.0913
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,512,0.0922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,1024,0.0934
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,2048,0.0978
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,4096,0.1057
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,8192,0.1065
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,16384,0.1097
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,1,0.0914
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,4,0.0910
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,8,0.0912
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,16,0.0913
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,32,0.0915
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,64,0.0931
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,128,0.0940
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,256,0.0936
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,512,0.0945
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,1024,0.0950
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,2048,0.1050
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,4096,0.1088
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,8192,0.1103
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,16384,0.1129
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,1,0.0905
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,4,0.0908
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,8,0.0905
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,16,0.0905
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,32,0.0906
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,64,0.0930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,128,0.0931
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,256,0.0943
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,512,0.0954
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,1024,0.0978
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,2048,0.1080
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,4096,0.1124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,8192,0.1142
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,16384,0.1188
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,1,0.0925
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,4,0.0924
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,8,0.0923
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,16,0.0922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,32,0.0926
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,64,0.0946
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,128,0.0958
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,256,0.0966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,512,0.0990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,1024,0.1018
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,2048,0.1166
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,4096,0.1204
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,8192,0.1241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,16384,0.1307
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,1,0.0946
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,4,0.0941
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,8,0.0940
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,16,0.0949
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,32,0.0946
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,64,0.0982
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,128,0.1001
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,256,0.1014
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,512,0.1047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,1024,0.1106
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,2048,0.1277
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,4096,0.1338
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,1,0.0972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,4,0.0976
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,8,0.0977
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,16,0.0976
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,32,0.0976
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,64,0.1004
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,128,0.1044
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,256,0.1069
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,512,0.1119
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,1024,0.1221
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,2048,0.1496
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,4096,0.1577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,1,0.1046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,4,0.1041
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,8,0.1048
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,16,0.1048
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,32,0.1053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,64,0.1080
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,128,0.1101
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,256,0.1155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,512,0.1260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,1024,0.1471
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,2048,0.1907
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,1,0.1290
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,4,0.1289
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,8,0.1289
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,16,0.1295
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,32,0.1292
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,64,0.1334
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,128,0.1400
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,256,0.1507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,512,0.1716
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,1024,0.2105
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,1,0.1856
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,4,0.1863
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,8,0.1865
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,16,0.1869
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,32,0.1879
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,64,0.1969
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,128,0.2080
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,256,0.2290
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,512,0.2702
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,1,0.3011
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,4,0.3029
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,8,0.3027
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,16,0.3044
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,32,0.3078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,64,0.3241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,128,0.3436
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,256,0.3848
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,1,0.0808
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,4,0.0802
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,8,0.0804
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,16,0.0807
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,32,0.0804
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,64,0.0826
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,128,0.0821
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,256,0.0824
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,512,0.0823
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,1024,0.0833
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,2048,0.0878
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,4096,0.0959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,8192,0.0963
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,16384,0.0992
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,1,0.0812
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,4,0.0813
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,8,0.0813
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,16,0.0807
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,32,0.0810
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,64,0.0824
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,128,0.0825
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,256,0.0820
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,512,0.0826
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,1024,0.0834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,2048,0.0888
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,4096,0.0968
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,8192,0.0976
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,16384,0.1005
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,1,0.0818
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,4,0.0822
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,8,0.0820
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,16,0.0819
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,32,0.0819
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,64,0.0834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,128,0.0835
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,256,0.0837
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,512,0.0843
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,1024,0.0857
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,2048,0.0921
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,4096,0.0988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,8192,0.1005
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,16384,0.1046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,1,0.0821
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,4,0.0818
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,8,0.0819
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,16,0.0823
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,32,0.0823
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,64,0.0838
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,128,0.0844
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,256,0.0848
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,512,0.0855
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,1024,0.0881
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,2048,0.0983
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,4096,0.1031
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,8192,0.1046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,16384,0.1093
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,1,0.0829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,4,0.0833
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,8,0.0832
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,16,0.0834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,32,0.0835
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,64,0.0853
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,128,0.0860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,256,0.0865
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,512,0.0887
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,1024,0.0917
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,2048,0.1058
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,4096,0.1099
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,8192,0.1133
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,16384,0.1199
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,1,0.0857
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,4,0.0857
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,8,0.0859
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,16,0.0860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,32,0.0858
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,64,0.0884
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,128,0.0898
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,256,0.0916
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,512,0.0947
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,1024,0.0999
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,2048,0.1180
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,4096,0.1236
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,1,0.0877
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,4,0.0876
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,8,0.0877
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,16,0.0877
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,32,0.0878
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,64,0.0908
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,128,0.0938
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,256,0.0968
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,512,0.1024
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,1024,0.1120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,2048,0.1395
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,4096,0.1472
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,1,0.0933
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,4,0.0934
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,8,0.0937
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,16,0.0938
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,32,0.0946
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,64,0.0975
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,128,0.1007
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,256,0.1058
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,512,0.1165
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,1024,0.1361
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,2048,0.1818
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,1,0.1120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,4,0.1121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,8,0.1123
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,16,0.1129
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,32,0.1131
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,64,0.1182
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,128,0.1236
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,256,0.1347
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,512,0.1543
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,1024,0.1943
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,1,0.1571
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,4,0.1577
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,8,0.1581
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,16,0.1578
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,32,0.1596
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,64,0.1697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,128,0.1805
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,256,0.2015
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,512,0.2407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,1,0.2423
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,4,0.2422
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,8,0.2430
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,16,0.2438
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,32,0.2469
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,64,0.2637
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,128,0.2843
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,256,0.3241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,1,0.1493
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,4,0.1493
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,8,0.1501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,16,0.1496
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,32,0.1498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,64,0.1500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,128,0.1503
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,256,0.1507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,512,0.1509
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,1024,0.1511
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,2048,0.1541
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,4096,0.1619
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,8192,0.1633
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,16384,0.1663
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,1,0.1534
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,4,0.1543
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,8,0.1534
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,16,0.1539
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,32,0.1532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,64,0.1546
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,128,0.1537
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,256,0.1541
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,512,0.1545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,1024,0.1548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,2048,0.1584
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,4096,0.1670
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,8192,0.1675
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,16384,0.1697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,1,0.1544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,4,0.1540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,8,0.1539
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,16,0.1544
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,32,0.1543
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,64,0.1551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,128,0.1553
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,256,0.1553
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,512,0.1559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,1024,0.1556
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,2048,0.1642
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,4096,0.1676
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,8192,0.1685
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,16384,0.1727
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,1,0.1594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,4,0.1594
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,8,0.1595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,16,0.1591
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,32,0.1592
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,64,0.1588
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,128,0.1602
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,256,0.1604
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,512,0.1597
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,1024,0.1613
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,2048,0.1652
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,4096,0.1734
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,8192,0.1756
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,16384,0.1799
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,1,0.1675
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,4,0.1679
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,8,0.1679
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,16,0.1672
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,32,0.1680
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,64,0.1683
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,128,0.1685
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,256,0.1689
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,512,0.1694
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,1024,0.1699
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,2048,0.1810
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,4096,0.1851
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,8192,0.1881
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,16384,0.1943
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,1,0.1845
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,4,0.1845
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,8,0.1841
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,16,0.1844
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,32,0.1847
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,64,0.1847
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,128,0.1847
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,256,0.1833
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,512,0.1860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,1024,0.1880
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,2048,0.2011
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,4096,0.2067
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,1,0.2138
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,4,0.2140
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,8,0.2131
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,16,0.2134
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,32,0.2143
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,64,0.2142
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,128,0.2148
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,256,0.2166
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,512,0.2190
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,1024,0.2263
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,2048,0.2399
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,4096,0.2480
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,1,0.2508
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,4,0.2510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,8,0.2510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,16,0.2509
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,32,0.2519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,64,0.2521
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,128,0.2529
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,256,0.2548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,512,0.2595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,1024,0.2687
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,2048,0.2909
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,1,0.3770
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,4,0.3769
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,8,0.3779
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,16,0.3773
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,32,0.3780
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,64,0.3796
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,128,0.3826
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,256,0.3874
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,512,0.3977
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,1024,0.4140
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,1,0.6721
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,4,0.6744
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,8,0.6748
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,16,0.6733
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,32,0.6750
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,64,0.6789
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,128,0.6824
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,256,0.6946
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,512,0.7127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,1,1.2908
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,4,1.2960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,8,1.2942
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,16,1.2912
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,32,1.2962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,64,1.3068
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,128,1.3117
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,256,1.3251
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,1,0.1161
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,4,0.1155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,8,0.1157
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,16,0.1160
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,32,0.1165
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,64,0.1166
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,128,0.1167
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,256,0.1171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,512,0.1168
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,1024,0.1172
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,2048,0.1217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,4096,0.1283
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,8192,0.1330
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,16384,0.1348
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,1,0.1241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,4,0.1234
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,8,0.1219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,16,0.1220
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,32,0.1222
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,64,0.1234
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,128,0.1224
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,256,0.1241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,512,0.1246
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,1024,0.1247
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,2048,0.1310
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,4096,0.1334
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,8192,0.1360
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,16384,0.1393
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,1,0.1221
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,4,0.1216
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,8,0.1218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,16,0.1217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,32,0.1218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,64,0.1221
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,128,0.1226
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,256,0.1228
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,512,0.1233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,1024,0.1234
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,2048,0.1278
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,4096,0.1351
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,8192,0.1373
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,16384,0.1408
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,1,0.1265
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,4,0.1265
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,8,0.1271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,16,0.1267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,32,0.1269
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,64,0.1276
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,128,0.1281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,256,0.1285
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,512,0.1284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,1024,0.1283
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,2048,0.1373
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,4096,0.1416
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,8192,0.1433
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,16384,0.1475
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,1,0.1355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,4,0.1353
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,8,0.1353
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,16,0.1355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,32,0.1357
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,64,0.1354
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,128,0.1360
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,256,0.1358
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,512,0.1366
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,1024,0.1370
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,2048,0.1478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,4096,0.1525
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,8192,0.1554
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,16384,0.1620
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,1,0.1494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,4,0.1493
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,8,0.1492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,16,0.1496
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,32,0.1501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,64,0.1494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,128,0.1500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,256,0.1500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,512,0.1522
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,1024,0.1536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,2048,0.1661
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,4096,0.1714
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,1,0.1737
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,4,0.1739
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,8,0.1738
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,16,0.1742
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,32,0.1745
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,64,0.1748
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,128,0.1750
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,256,0.1764
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,512,0.1791
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,1024,0.1876
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,2048,0.2011
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,4096,0.2082
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,1,0.2114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,4,0.2121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,8,0.2116
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,16,0.2120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,32,0.2128
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,64,0.2137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,128,0.2144
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,256,0.2161
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,512,0.2208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,1024,0.2291
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,2048,0.2517
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,1,0.3227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,4,0.3233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,8,0.3235
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,16,0.3238
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,32,0.3248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,64,0.3267
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,128,0.3280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,256,0.3335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,512,0.3432
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,1024,0.3599
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,1,0.5760
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,4,0.5758
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,8,0.5766
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,16,0.5763
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,32,0.5791
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,64,0.5820
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,128,0.5866
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,256,0.5965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,512,0.6146
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,1,1.0925
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,4,1.0920
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,8,1.0968
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,16,1.0977
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,32,1.0965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,64,1.1044
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,128,1.1143
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,256,1.1291
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,1,0.0983
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,4,0.0984
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,8,0.0982
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,16,0.0985
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,32,0.0980
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,64,0.0991
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,128,0.0990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,256,0.0995
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,512,0.0996
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,1024,0.1006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,2048,0.1036
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,4096,0.1103
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,8192,0.1128
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,16384,0.1155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,1,0.1037
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,4,0.1037
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,8,0.1039
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,16,0.1045
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,32,0.1040
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,64,0.1044
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,128,0.1047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,256,0.1049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,512,0.1046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,1024,0.1045
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,2048,0.1127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,4096,0.1163
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,8192,0.1166
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,16384,0.1202
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,1,0.1042
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,4,0.1039
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,8,0.1042
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,16,0.1040
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,32,0.1038
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,64,0.1042
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,128,0.1046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,256,0.1046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,512,0.1047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,1024,0.1056
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,2048,0.1137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,4096,0.1175
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,8192,0.1184
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,16384,0.1219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,1,0.1081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,4,0.1079
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,8,0.1086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,16,0.1085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,32,0.1088
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,64,0.1085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,128,0.1089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,256,0.1100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,512,0.1096
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,1024,0.1099
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,2048,0.1188
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,4096,0.1231
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,8192,0.1248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,16384,0.1295
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,1,0.1169
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,4,0.1170
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,8,0.1171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,16,0.1173
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,32,0.1175
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,64,0.1177
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,128,0.1180
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,256,0.1180
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,512,0.1186
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,1024,0.1191
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,2048,0.1299
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,4096,0.1338
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,8192,0.1374
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,16384,0.1448
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,1,0.1306
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,4,0.1308
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,8,0.1309
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,16,0.1310
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,32,0.1306
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,64,0.1308
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,128,0.1315
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,256,0.1304
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,512,0.1330
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,1024,0.1346
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,2048,0.1473
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,4096,0.1530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,1,0.1534
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,4,0.1529
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,8,0.1531
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,16,0.1533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,32,0.1539
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,64,0.1543
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,128,0.1550
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,256,0.1560
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,512,0.1587
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,1024,0.1667
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,2048,0.1795
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,4096,0.1876
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,1,0.1884
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,4,0.1886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,8,0.1889
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,16,0.1883
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,32,0.1886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,64,0.1887
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,128,0.1898
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,256,0.1924
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,512,0.1963
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,1024,0.2042
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,2048,0.2275
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,1,0.2891
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,4,0.2892
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,8,0.2894
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,16,0.2902
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,32,0.2907
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,64,0.2928
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,128,0.2945
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,256,0.2988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,512,0.3093
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,1024,0.3254
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,1,0.5031
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,4,0.5038
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,8,0.5048
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,16,0.5058
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,32,0.5081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,64,0.5114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,128,0.5163
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,256,0.5262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,512,0.5459
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,1,0.9401
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,4,0.9406
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,8,0.9442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,16,0.9450
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,32,0.9461
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,64,0.9529
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,128,0.9607
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,256,0.9789
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,1,0.0888
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,4,0.0892
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,8,0.0897
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,16,0.0890
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,32,0.0891
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,64,0.0903
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,128,0.0900
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,256,0.0898
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,512,0.0906
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,1024,0.0907
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,2048,0.0930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,4096,0.1009
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,8192,0.1026
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,16384,0.1033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,1,0.0940
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,4,0.0940
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,8,0.0944
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,16,0.0945
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,32,0.0941
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,64,0.0940
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,128,0.0945
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,256,0.0948
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,512,0.0948
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,1024,0.0950
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,2048,0.1006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,4096,0.1064
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,8192,0.1070
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,16384,0.1104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,1,0.0953
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,4,0.0952
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,8,0.0950
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,16,0.0952
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,32,0.0946
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,64,0.0952
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,128,0.0963
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,256,0.0959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,512,0.0961
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,1024,0.0965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,2048,0.1046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,4096,0.1085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,8192,0.1103
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,16384,0.1135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,1,0.0992
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,4,0.1000
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,8,0.0995
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,16,0.1001
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,32,0.0992
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,64,0.1008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,128,0.1012
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,256,0.1009
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,512,0.1011
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,1024,0.1018
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,2048,0.1104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,4096,0.1146
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,8192,0.1164
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,16384,0.1213
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,1,0.1077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,4,0.1077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,8,0.1074
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,16,0.1078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,32,0.1078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,64,0.1081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,128,0.1088
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,256,0.1086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,512,0.1100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,1024,0.1104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,2048,0.1207
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,4096,0.1256
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,8192,0.1284
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,16384,0.1344
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,1,0.1217
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,4,0.1219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,8,0.1221
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,16,0.1218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,32,0.1218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,64,0.1223
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,128,0.1228
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,256,0.1221
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,512,0.1250
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,1024,0.1269
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,2048,0.1388
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,4096,0.1445
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,1,0.1432
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,4,0.1431
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,8,0.1436
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,16,0.1437
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,32,0.1439
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,64,0.1451
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,128,0.1453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,256,0.1466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,512,0.1497
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,1024,0.1571
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,2048,0.1704
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,4096,0.1785
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,1,0.1761
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,4,0.1760
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,8,0.1755
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,16,0.1759
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,32,0.1760
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,64,0.1778
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,128,0.1786
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,256,0.1810
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,512,0.1853
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,1024,0.1936
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,2048,0.2166
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,1,0.2719
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,4,0.2727
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,8,0.2724
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,16,0.2727
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,32,0.2731
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,64,0.2762
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,128,0.2781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,256,0.2817
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,512,0.2908
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,1024,0.3072
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,1,0.4703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,4,0.4700
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,8,0.4713
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,16,0.4712
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,32,0.4738
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,64,0.4787
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,128,0.4834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,256,0.4933
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,512,0.5122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,1,0.8697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,4,0.8696
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,8,0.8705
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,16,0.8741
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,32,0.8741
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,64,0.8837
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,128,0.8931
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,256,0.9060
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,1,0.1383
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,4,0.1375
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,8,0.1388
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,16,0.1390
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,32,0.1391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,64,0.1420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,128,0.1420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,256,0.1420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,512,0.1418
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,1024,0.1421
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,2048,0.1483
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,4096,0.1585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,8192,0.1603
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,16384,0.1630
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,1,0.1394
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,4,0.1393
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,8,0.1398
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,16,0.1398
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,32,0.1399
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,64,0.1424
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,128,0.1428
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,256,0.1430
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,512,0.1434
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,1024,0.1447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,2048,0.1540
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,4096,0.1590
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,8192,0.1601
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,16384,0.1627
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,1,0.1390
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,4,0.1387
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,8,0.1386
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,16,0.1386
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,32,0.1388
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,64,0.1410
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,128,0.1424
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,256,0.1423
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,512,0.1435
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,1024,0.1455
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,2048,0.1551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,4096,0.1580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,8192,0.1568
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,16384,0.1631
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,1,0.1352
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,4,0.1346
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,8,0.1349
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,16,0.1353
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,32,0.1354
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,64,0.1381
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,128,0.1393
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,256,0.1392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,512,0.1435
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,1024,0.1450
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,2048,0.1606
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,4096,0.1627
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,8192,0.1657
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,16384,0.1693
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,1,0.1368
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,4,0.1337
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,8,0.1373
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,16,0.1340
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,32,0.1376
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,64,0.1414
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,128,0.1427
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,256,0.1421
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,512,0.1468
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,1024,0.1474
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,2048,0.1623
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,4096,0.1688
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,8192,0.1698
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,16384,0.1789
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,1,0.1305
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,4,0.1308
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,8,0.1306
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,16,0.1302
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,32,0.1306
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,64,0.1374
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,128,0.1404
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,256,0.1420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,512,0.1458
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,1024,0.1554
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,2048,0.1699
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,4096,0.1755
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,1,0.1293
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,4,0.1295
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,8,0.1291
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,16,0.1286
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,32,0.1293
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,64,0.1322
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,128,0.1450
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,256,0.1470
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,512,0.1533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,1024,0.1640
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,2048,0.1911
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,4096,0.1980
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,1,0.1530
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,4,0.1531
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,8,0.1528
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,16,0.1534
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,32,0.1531
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,64,0.1562
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,128,0.1587
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,256,0.1639
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,512,0.1753
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,1024,0.1960
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,2048,0.2411
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,1,0.1920
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,4,0.1916
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,8,0.1922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,16,0.1929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,32,0.1930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,64,0.1983
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,128,0.2052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,256,0.2166
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,512,0.2392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,1024,0.2796
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,1,0.2857
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,4,0.2865
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,8,0.2877
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,16,0.2879
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,32,0.2894
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,64,0.2979
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,128,0.3081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,256,0.3318
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,512,0.3736
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,1,0.4959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,4,0.4976
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,8,0.4996
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,16,0.5000
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,32,0.5030
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,64,0.5140
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,128,0.5341
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,256,0.5768
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,1,0.1114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,4,0.1110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,8,0.1114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,16,0.1110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,32,0.1113
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,64,0.1136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,128,0.1137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,256,0.1143
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,512,0.1144
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,1024,0.1137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,2048,0.1204
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,4096,0.1271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,8192,0.1264
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,16384,0.1307
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,1,0.1114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,4,0.1112
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,8,0.1109
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,16,0.1118
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,32,0.1114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,64,0.1136
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,128,0.1139
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,256,0.1137
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,512,0.1141
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,1024,0.1150
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,2048,0.1259
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,4096,0.1288
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,8192,0.1307
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,16384,0.1324
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,1,0.1053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,4,0.1053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,8,0.1051
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,16,0.1049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,32,0.1054
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,64,0.1075
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,128,0.1070
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,256,0.1080
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,512,0.1088
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,1024,0.1125
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,2048,0.1228
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,4096,0.1253
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,8192,0.1277
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,16384,0.1298
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,1,0.1047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,4,0.1049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,8,0.1046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,16,0.1053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,32,0.1049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,64,0.1076
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,128,0.1081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,256,0.1089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,512,0.1104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,1024,0.1124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,2048,0.1234
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,4096,0.1291
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,8192,0.1309
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,16384,0.1360
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,1,0.1054
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,4,0.1059
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,8,0.1053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,16,0.1064
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,32,0.1058
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,64,0.1086
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,128,0.1098
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,256,0.1107
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,512,0.1130
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,1024,0.1165
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,2048,0.1296
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,4096,0.1341
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,8192,0.1370
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,16384,0.1442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,1,0.1044
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,4,0.1041
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,8,0.1039
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,16,0.1043
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,32,0.1041
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,64,0.1078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,128,0.1095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,256,0.1120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,512,0.1152
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,1024,0.1208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,2048,0.1377
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,4096,0.1442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,1,0.1054
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,4,0.1051
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,8,0.1046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,16,0.1060
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,32,0.1058
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,64,0.1081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,128,0.1151
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,256,0.1171
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,512,0.1229
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,1024,0.1345
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,2048,0.1607
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,4096,0.1685
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,1,0.1207
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,4,0.1207
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,8,0.1208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,16,0.1207
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,32,0.1210
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,64,0.1237
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,128,0.1268
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,256,0.1321
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,512,0.1427
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,1024,0.1633
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,2048,0.2083
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,1,0.1479
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,4,0.1478
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,8,0.1481
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,16,0.1487
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,32,0.1488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,64,0.1525
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,128,0.1589
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,256,0.1707
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,512,0.1929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,1024,0.2326
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,1,0.2052
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,4,0.2057
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,8,0.2065
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,16,0.2078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,32,0.2092
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,64,0.2184
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,128,0.2292
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,256,0.2523
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,512,0.2954
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,1,0.3362
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,4,0.3381
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,8,0.3384
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,16,0.3399
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,32,0.3424
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,64,0.3580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,128,0.3792
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,256,0.4198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,1,0.0965
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,4,0.0966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,8,0.0973
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,16,0.0966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,32,0.0968
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,64,0.0990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,128,0.0993
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,256,0.0991
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,512,0.1006
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,1024,0.1003
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,2048,0.1068
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,4096,0.1159
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,8192,0.1146
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,16384,0.1150
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,1,0.0986
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,4,0.0988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,8,0.0985
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,16,0.0985
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,32,0.0990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,64,0.1003
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,128,0.1005
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,256,0.1007
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,512,0.1009
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,1024,0.1020
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,2048,0.1110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,4096,0.1149
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,8192,0.1161
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,16384,0.1183
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,1,0.0945
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,4,0.0945
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,8,0.0944
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,16,0.0941
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,32,0.0949
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,64,0.0967
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,128,0.0968
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,256,0.0972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,512,0.0980
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,1024,0.0988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,2048,0.1107
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,4096,0.1120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,8192,0.1135
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,16384,0.1164
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,1,0.0925
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,4,0.0924
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,8,0.0920
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,16,0.0924
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,32,0.0923
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,64,0.0942
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,128,0.0948
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,256,0.0950
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,512,0.0964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,1024,0.0981
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,2048,0.1094
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,4096,0.1127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,8192,0.1149
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,16384,0.1191
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,1,0.0922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,4,0.0920
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,8,0.0923
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,16,0.0922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,32,0.0925
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,64,0.0944
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,128,0.0952
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,256,0.0964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,512,0.0981
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,1024,0.1014
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,2048,0.1150
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,4096,0.1200
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,8192,0.1233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,16384,0.1297
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,1,0.0931
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,4,0.0927
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,8,0.0925
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,16,0.0925
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,32,0.0929
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,64,0.0959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,128,0.0974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,256,0.0992
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,512,0.1024
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,1024,0.1078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,2048,0.1249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,4096,0.1309
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,1,0.0931
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,4,0.0931
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,8,0.0930
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,16,0.0932
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,32,0.0935
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,64,0.0962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,128,0.1001
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,256,0.1024
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,512,0.1080
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,1024,0.1187
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,2048,0.1462
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,4096,0.1533
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,1,0.1035
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,4,0.1033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,8,0.1040
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,16,0.1041
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,32,0.1046
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,64,0.1076
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,128,0.1104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,256,0.1161
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,512,0.1260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,1024,0.1453
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,2048,0.1914
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,1,0.1253
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,4,0.1259
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,8,0.1258
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,16,0.1261
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,32,0.1260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,64,0.1314
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,128,0.1378
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,256,0.1484
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,512,0.1692
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,1024,0.2091
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,1,0.1697
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,4,0.1706
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,8,0.1703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,16,0.1708
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,32,0.1715
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,64,0.1810
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,128,0.1921
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,256,0.2142
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,512,0.2560
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,1,0.2585
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,4,0.2597
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,8,0.2611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,16,0.2632
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,32,0.2652
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,64,0.2831
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,128,0.3020
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,256,0.3437
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,1,0.0863
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,4,0.0859
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,8,0.0861
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,16,0.0861
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,32,0.0861
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,64,0.0873
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,128,0.0876
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,256,0.0881
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,512,0.0883
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,1024,0.0904
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,2048,0.0971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,4096,0.1036
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,8192,0.1053
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,16384,0.1081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,1,0.0872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,4,0.0873
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,8,0.0872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,16,0.0873
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,32,0.0874
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,64,0.0886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,128,0.0885
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,256,0.0890
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,512,0.0905
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,1024,0.0922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,2048,0.1023
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,4096,0.1055
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,8192,0.1078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,16384,0.1106
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,1,0.0781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,4,0.0782
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,8,0.0783
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,16,0.0785
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,32,0.0781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,64,0.0794
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,128,0.0799
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,256,0.0815
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,512,0.0828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,1024,0.0843
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,2048,0.0964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,4096,0.0987
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,8192,0.1008
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,16384,0.1061
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,1,0.0782
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,4,0.0776
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,8,0.0786
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,16,0.0781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,32,0.0781
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,64,0.0800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,128,0.0811
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,256,0.0831
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,512,0.0846
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,1024,0.0891
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,2048,0.1007
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,4096,0.1050
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,8192,0.1073
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,16384,0.1127
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,1,0.0788
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,4,0.0794
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,8,0.0794
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,16,0.0802
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,32,0.0801
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,64,0.0828
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,128,0.0846
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,256,0.0841
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,512,0.0886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,1024,0.0937
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,2048,0.1078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,4096,0.1120
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,8192,0.1152
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,16384,0.1225
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,1,0.0814
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,4,0.0822
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,8,0.0822
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,16,0.0821
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,32,0.0826
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,64,0.0846
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,128,0.0872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,256,0.0901
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,512,0.0948
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,1024,0.1004
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,2048,0.1176
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,4096,0.1238
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,1,0.0815
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,4,0.0815
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,8,0.0817
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,16,0.0816
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,32,0.0834
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,64,0.0857
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,128,0.0917
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,256,0.0964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,512,0.1017
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,1024,0.1121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,2048,0.1393
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,4096,0.1474
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,1,0.0938
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,4,0.0925
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,8,0.0936
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,16,0.0938
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,32,0.0949
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,64,0.0990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,128,0.1028
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,256,0.1087
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,512,0.1182
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,1024,0.1383
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,2048,0.1835
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,1,0.1129
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,4,0.1132
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,8,0.1140
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,16,0.1143
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,32,0.1153
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,64,0.1202
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,128,0.1260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,256,0.1372
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,512,0.1573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,1024,0.1966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,1,0.1529
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,4,0.1531
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,8,0.1531
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,16,0.1538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,32,0.1553
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,64,0.1642
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,128,0.1765
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,256,0.1972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,512,0.2379
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,1,0.2237
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,4,0.2243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,8,0.2253
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,16,0.2264
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,32,0.2292
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,64,0.2452
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,128,0.2662
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,256,0.3064
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,1,0.1494
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,4,0.1501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,8,0.1498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,16,0.1496
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,32,0.1502
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,64,0.1510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,128,0.1507
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,256,0.1512
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,512,0.1505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,1024,0.1510
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,2048,0.1555
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,4096,0.1663
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,8192,0.1748
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,16384,0.1641
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,1,0.1495
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,4,0.1496
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,8,0.1492
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,16,0.1497
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,32,0.1490
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,64,0.1493
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,128,0.1499
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,256,0.1505
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,512,0.1501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,1024,0.1501
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,2048,0.1545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,4096,0.1636
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,8192,0.1641
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,16384,0.1657
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,1,0.1464
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,4,0.1466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,8,0.1467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,16,0.1466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,32,0.1466
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,64,0.1477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,128,0.1477
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,256,0.1479
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,512,0.1481
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,1024,0.1480
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,2048,0.1565
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,4096,0.1595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,8192,0.1582
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,16384,0.1645
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,1,0.1455
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,4,0.1457
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,8,0.1455
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,16,0.1459
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,32,0.1454
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,64,0.1461
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,128,0.1467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,256,0.1475
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,512,0.1476
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,1024,0.1487
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,2048,0.1568
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,4096,0.1595
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,8192,0.1633
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,16384,0.1698
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,1,0.1523
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,4,0.1539
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,8,0.1526
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,16,0.1538
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,32,0.1527
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,64,0.1548
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,128,0.1531
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,256,0.1557
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,512,0.1545
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,1024,0.1559
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,2048,0.1618
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,4096,0.1709
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,8192,0.1757
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,16384,0.1800
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,1,0.1629
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,4,0.1628
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,8,0.1629
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,16,0.1634
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,32,0.1630
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,64,0.1635
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,128,0.1641
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,256,0.1627
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,512,0.1651
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,1024,0.1678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,2048,0.1801
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,4096,0.1859
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,1,0.1785
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,4,0.1794
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,8,0.1793
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,16,0.1790
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,32,0.1798
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,64,0.1798
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,128,0.1805
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,256,0.1818
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,512,0.1843
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,1024,0.1935
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,2048,0.2051
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,4096,0.2133
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,1,0.2256
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,4,0.2255
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,8,0.2259
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,16,0.2258
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,32,0.2261
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,64,0.2270
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,128,0.2278
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,256,0.2298
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,512,0.2349
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,1024,0.2442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,2048,0.2678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,1,0.3368
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,4,0.3363
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,8,0.3366
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,16,0.3370
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,32,0.3386
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,64,0.3407
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,128,0.3425
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,256,0.3470
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,512,0.3580
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,1024,0.3758
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,1,0.5749
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,4,0.5743
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,8,0.5749
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,16,0.5766
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,32,0.5792
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,64,0.5829
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,128,0.5895
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,256,0.5974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,512,0.6172
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,1,1.0764
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,4,1.0783
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,8,1.0783
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,16,1.0796
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,32,1.0830
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,64,1.0882
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,128,1.0978
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,256,1.1122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,1,0.1212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,4,0.1215
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,8,0.1214
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,16,0.1216
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,32,0.1210
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,64,0.1216
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,128,0.1215
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,256,0.1228
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,512,0.1222
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,1024,0.1219
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,2048,0.1260
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,4096,0.1323
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,8192,0.1335
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,16384,0.1356
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,1,0.1255
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,4,0.1249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,8,0.1248
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,16,0.1251
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,32,0.1249
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,64,0.1251
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,128,0.1251
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,256,0.1258
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,512,0.1257
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,1024,0.1257
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,2048,0.1327
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,4096,0.1371
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,8192,0.1391
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,16384,0.1404
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,1,0.1198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,4,0.1198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,8,0.1199
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,16,0.1200
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,32,0.1197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,64,0.1204
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,128,0.1206
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,256,0.1212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,512,0.1208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,1024,0.1208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,2048,0.1300
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,4096,0.1320
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,8192,0.1340
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,16384,0.1375
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,1,0.1194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,4,0.1197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,8,0.1197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,16,0.1195
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,32,0.1198
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,64,0.1197
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,128,0.1204
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,256,0.1212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,512,0.1212
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,1024,0.1222
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,2048,0.1311
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,4096,0.1353
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,8192,0.1372
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,16384,0.1419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,1,0.1258
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,4,0.1255
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,8,0.1262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,16,0.1259
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,32,0.1261
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,64,0.1264
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,128,0.1271
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,256,0.1266
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,512,0.1280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,1024,0.1287
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,2048,0.1390
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,4096,0.1444
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,8192,0.1468
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,16384,0.1536
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,1,0.1386
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,4,0.1389
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,8,0.1387
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,16,0.1387
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,32,0.1389
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,64,0.1392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,128,0.1398
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,256,0.1383
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,512,0.1412
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,1024,0.1430
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,2048,0.1560
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,4096,0.1611
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,1,0.1619
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,4,0.1614
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,8,0.1619
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,16,0.1623
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,32,0.1628
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,64,0.1632
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,128,0.1639
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,256,0.1648
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,512,0.1678
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,1024,0.1758
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,2048,0.1886
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,4096,0.1964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,1,0.2066
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,4,0.2062
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,8,0.2069
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,16,0.2069
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,32,0.2073
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,64,0.2077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,128,0.2082
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,256,0.2098
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,512,0.2139
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,1024,0.2232
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,2048,0.2467
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,1,0.3106
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,4,0.3100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,8,0.3102
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,16,0.3113
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,32,0.3121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,64,0.3143
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,128,0.3153
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,256,0.3201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,512,0.3305
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,1024,0.3486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,1,0.5355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,4,0.5351
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,8,0.5354
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,16,0.5359
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,32,0.5372
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,64,0.5411
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,128,0.5456
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,256,0.5570
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,512,0.5753
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,1,0.9955
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,4,0.9931
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,8,0.9964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,16,0.9966
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,32,0.9983
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,64,1.0047
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,128,1.0132
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,256,1.0301
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,1,0.1064
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,4,0.1068
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,8,0.1062
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,16,0.1070
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,32,0.1064
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,64,0.1074
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,128,0.1072
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,256,0.1077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,512,0.1072
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,1024,0.1078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,2048,0.1107
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,4096,0.1184
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,8192,0.1185
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,16384,0.1224
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,1,0.1114
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,4,0.1122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,8,0.1117
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,16,0.1112
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,32,0.1110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,64,0.1118
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,128,0.1118
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,256,0.1121
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,512,0.1122
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,1024,0.1125
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,2048,0.1167
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,4096,0.1238
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,8192,0.1251
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,16384,0.1279
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,1,0.1074
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,4,0.1072
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,8,0.1074
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,16,0.1070
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,32,0.1072
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,64,0.1073
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,128,0.1082
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,256,0.1082
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,512,0.1085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,1024,0.1085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,2048,0.1189
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,4096,0.1201
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,8192,0.1218
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,16384,0.1266
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,1,0.1081
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,4,0.1078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,8,0.1077
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,16,0.1076
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,32,0.1078
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,64,0.1085
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,128,0.1087
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,256,0.1093
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,512,0.1095
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,1024,0.1099
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,2048,0.1194
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,4096,0.1230
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,8192,0.1246
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,16384,0.1297
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,1,0.1152
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,4,0.1155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,8,0.1149
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,16,0.1153
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,32,0.1152
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,64,0.1155
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,128,0.1161
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,256,0.1165
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,512,0.1172
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,1024,0.1174
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,2048,0.1282
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,4096,0.1325
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,8192,0.1355
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,16384,0.1426
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,1,0.1275
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,4,0.1279
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,8,0.1279
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,16,0.1280
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,32,0.1282
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,64,0.1281
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,128,0.1288
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,256,0.1272
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,512,0.1302
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,1024,0.1322
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,2048,0.1447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,4096,0.1500
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,1,0.1485
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,4,0.1488
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,8,0.1483
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,16,0.1486
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,32,0.1493
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,64,0.1498
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,128,0.1508
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,256,0.1519
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,512,0.1551
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,1024,0.1622
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,2048,0.1758
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,4096,0.1835
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,1,0.1872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,4,0.1871
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,8,0.1872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,16,0.1872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,32,0.1879
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,64,0.1889
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,128,0.1897
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,256,0.1922
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,512,0.1958
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,1024,0.2043
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,2048,0.2274
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,1,0.2860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,4,0.2860
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,8,0.2869
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,16,0.2875
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,32,0.2880
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,64,0.2899
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,128,0.2913
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,256,0.2970
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,512,0.3056
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,1024,0.3233
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,1,0.4884
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,4,0.4883
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,8,0.4892
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,16,0.4900
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,32,0.4921
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,64,0.4969
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,128,0.4998
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,256,0.5104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,512,0.5297
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,1,0.9062
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,4,0.9012
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,8,0.9066
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,16,0.9023
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,32,0.9034
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,64,0.9102
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,128,0.9184
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,256,0.9418
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,1,0.0974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,4,0.0971
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,8,0.0974
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,16,0.0975
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,32,0.0975
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,64,0.0972
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,128,0.0977
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,256,0.0977
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,512,0.0977
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,1024,0.0986
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,2048,0.1029
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,4096,0.1100
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,8192,0.1109
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,16384,0.1139
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,1,0.1028
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,4,0.1035
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,8,0.1036
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,16,0.1036
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,32,0.1035
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,64,0.1036
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,128,0.1033
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,256,0.1039
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,512,0.1045
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,1024,0.1049
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,2048,0.1124
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,4096,0.1163
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,8192,0.1170
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,16384,0.1220
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,1,0.0955
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,4,0.0951
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,8,0.0952
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,16,0.0946
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,32,0.0945
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,64,0.0951
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,128,0.0951
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,256,0.0959
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,512,0.0962
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,1024,0.0964
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,2048,0.1034
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,4096,0.1104
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,8192,0.1123
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,16384,0.1164
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,1,0.0988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,4,0.0988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,8,0.0988
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,16,0.0992
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,32,0.0990
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,64,0.0996
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,128,0.1002
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,256,0.1014
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,512,0.1022
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,1024,0.1029
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,2048,0.1126
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,4096,0.1167
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,8192,0.1195
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,16384,0.1243
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,1,0.1066
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,4,0.1069
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,8,0.1071
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,16,0.1065
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,32,0.1074
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,64,0.1075
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,128,0.1089
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,256,0.1091
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,512,0.1103
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,1024,0.1110
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,2048,0.1227
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,4096,0.1262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,8192,0.1298
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,16384,0.1370
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,1,0.1205
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,4,0.1205
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,8,0.1204
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,16,0.1203
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,32,0.1205
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,64,0.1211
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,128,0.1220
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,256,0.1208
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,512,0.1241
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,1024,0.1262
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,2048,0.1392
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,4096,0.1447
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,1,0.1418
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,4,0.1416
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,8,0.1419
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,16,0.1420
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,32,0.1426
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,64,0.1432
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,128,0.1442
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,256,0.1462
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,512,0.1491
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,1024,0.1567
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,2048,0.1695
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,4096,0.1785
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,1,0.1774
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,4,0.1772
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,8,0.1775
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,16,0.1774
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,32,0.1784
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,64,0.1796
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,128,0.1808
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,256,0.1832
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,512,0.1872
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,1024,0.1949
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,2048,0.2191
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,1,0.2732
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,4,0.2736
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,8,0.2728
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,16,0.2734
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,32,0.2745
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,64,0.2763
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,128,0.2787
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,256,0.2832
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,512,0.2928
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,1024,0.3105
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,1,0.4657
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,4,0.4660
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,8,0.4669
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,16,0.4680
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,32,0.4703
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,64,0.4743
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,128,0.4786
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,256,0.4889
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,512,0.5073
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,1,0.8518
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,4,0.8532
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,8,0.8553
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,16,0.8546
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,32,0.8573
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,64,0.8670
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,128,0.8731
SGLang,0.0.0.dev1+ga4cf2ea12,NVIDIA H200,dsa_generation_module,dsa_nsa,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,256,0.8908
