framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,2,0.1729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,4,0.1693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,8,0.1675
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,512,0.1795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,2048,0.3033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,4096,0.2060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,8192,0.1965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,1024,0.1757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,16384,0.2136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,32768,0.2305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,65536,0.2893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,131072,0.3994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,32,0.1729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,16,0.1689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,128,0.1676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,4,0.1862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,8,0.1863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,128,0.1895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,32,0.1761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,64,0.1704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,256,0.1922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,512,0.1939
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,1024,0.1818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,64,0.1743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,256,0.1734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,2,0.1843
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,4096,0.2133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,8192,0.2115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,16384,0.2147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,2048,0.3143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,32768,0.2486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,16,0.1826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,65536,0.3017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,8,0.1767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,64,0.1834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,128,0.1822
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,32,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,16,0.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,4,0.1775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,2,0.1841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,2048,0.3325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,4096,0.1971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,256,0.1839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,1024,0.1928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,16384,0.2257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,512,0.1967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,65536,0.3058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,32768,0.2527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,131072,0.4077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,8,0.1520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,4,0.1506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,32,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,64,0.1456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,128,0.1586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,256,0.1646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,512,0.1671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,1024,0.1670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,4096,0.1838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,8192,0.1871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,16384,0.1975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,32768,0.2224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,65536,0.2871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,131072,0.4001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,2,0.1561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,4,0.1584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,8,0.1534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,16,0.1557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,131072,0.3995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,32,0.1572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,64,0.1530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,128,0.1641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,512,0.1698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,8192,0.2088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,16,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,2048,0.2982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,2048,0.3002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,4096,0.1891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,1024,0.1650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,2,0.1487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,65536,0.3060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,16384,0.2023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,256,0.1603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,4,0.1491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,8,0.1481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,2,0.1477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,16,0.1477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,8192,0.1911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,131072,0.4353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,32768,0.2359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,64,0.1484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,128,0.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,256,0.1551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,512,0.1556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,32,0.1477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,1024,0.1617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,2048,0.3001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,8192,0.1937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,16384,0.2087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,32768,0.2431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,131072,0.4750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,4096,0.1893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,2,0.1560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,65536,0.3259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,8,0.1545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,16,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,1024,0.1736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,4,0.1561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,2048,0.3227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,64,0.1540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,8192,0.2198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,32,0.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,4096,0.2081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,128,0.1618
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,32768,0.2841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,65536,0.3896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,131072,0.5772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,16384,0.2400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,512,0.1660
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,256,0.1613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,2,0.1798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,4,0.1842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,64,0.1830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,256,0.1898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,128,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,8,0.1809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,1024,0.2253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,512,0.2014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,4096,0.2725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,8192,0.2911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,16384,0.3171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,65536,0.5303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,131072,0.8013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,2,0.2475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,4,0.2422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,8,0.2444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,16,0.2429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,32,0.2467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,64,0.2422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,128,0.2550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,256,0.2617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,512,0.2818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,1024,0.3241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,2048,0.6254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,32,0.1810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,2048,0.3838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,32768,0.3895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,4096,0.4125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,16,0.1824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,131072,1.4135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,65536,0.8542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,4,0.3481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,8,0.3513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,64,0.3542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,128,0.3755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,256,0.3850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,32,0.3574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,1024,0.4963
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,2048,1.0904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,4096,0.6651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,8192,0.7338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,16384,0.8318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,65536,1.5601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,32768,0.6188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,8192,0.4409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,2,0.3568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,2,0.6317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,4,0.6302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,32,0.6315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,8,0.6275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,64,0.6334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,16,0.3568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,128,0.6626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,256,0.6955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,512,0.7639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,1024,0.9124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,2048,1.9700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,4096,1.2309
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,8192,1.3364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,16384,1.5479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,512,0.4244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,2,0.1384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,32768,1.9788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,32768,1.0685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,16384,0.5028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,16,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,32,0.1384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,4,0.1348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,8,0.3487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,128,0.1449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,256,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,64,0.1343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,512,0.1429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,4096,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,16384,0.1753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,1024,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,2048,0.2751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,8192,0.1643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,32768,0.2032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,32,0.1418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,16,0.6503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,64,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,128,0.1519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,256,0.1436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,512,0.1583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,1024,0.1487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,131072,0.3581
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,4,0.1487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,8,0.1439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,65536,0.2630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,16,0.1428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,4096,0.1772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,8192,0.1826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,16384,0.1846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,131072,0.3695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,65536,0.2776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,32768,0.2208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,4,0.1468
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,8,0.1469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,32,0.1522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,16,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,128,0.1481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,2,0.1418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,2,0.1453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,2048,0.2922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,64,0.1487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,512,0.1596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,1024,0.1547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,2048,0.2893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,32768,0.2208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,8192,0.1788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,4096,0.1739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,131072,0.3812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,2,0.1429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,4,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,256,0.1521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,8,0.1417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,16384,0.1975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,65536,0.2844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,32,0.1424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,128,0.1467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,512,0.1583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,64,0.1417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,256,0.1521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,16,0.1449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,2048,0.2931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,16384,0.1895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,1024,0.1620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,32768,0.2217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,65536,0.2883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,2,0.1363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,4,0.1312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,131072,0.3983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,4096,0.1815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,16,0.1342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,128,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,32,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,8192,0.1807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,8,0.1372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,256,0.1452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,512,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,2048,0.2877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,1024,0.1508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,4096,0.1784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,8192,0.1847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,64,0.1342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,16384,0.1991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,32768,0.2221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,4,0.1227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,2,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,16,0.1247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,65536,0.2939
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,8,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,64,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,131072,0.4231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,256,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,512,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,4096,0.1856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,8192,0.1911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,2048,0.3000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,128,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,65536,0.3256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,131072,0.4733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,32768,0.2449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,8,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,4,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,2,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,16,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,32,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,32,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,128,0.1403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,64,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,512,0.1593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,256,0.1472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,2048,0.3629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,1024,0.1884
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,8192,0.2603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,16384,0.2804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,4096,0.2526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,65536,0.4309
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,32768,0.3289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,131072,0.6203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,2,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,8,0.1571
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,16,0.1573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,32,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,64,0.1583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,256,0.1810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,512,0.2071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,1024,0.2546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,4096,0.3556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,8192,0.3813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,16384,0.4104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,1024,0.1471
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,131072,0.8905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,2,0.2221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,16384,0.2076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,4,0.2167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,8,0.2197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,16,0.2184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,32,0.2162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,64,0.2178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,128,0.2405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,256,0.2580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,512,0.3078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,1024,0.3977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,2048,0.7986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,8192,0.6156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,4,0.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,16384,0.6735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,32768,0.7931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,128,0.1672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,65536,1.0179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,131072,1.5762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,2,0.3374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,4,0.3262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,2048,0.4668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,8,0.3273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,32,0.3370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,64,0.3255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,16,0.3279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,128,0.3690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,2048,1.4845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,256,0.4231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,512,0.5157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,1024,0.6814
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,32768,0.4734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,4096,1.0647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,16384,1.2269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,8,0.5685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,32768,1.4705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,2,0.5681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,8192,1.1192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,65536,1.9729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,16,0.5696
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,65536,0.6189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,4096,0.5886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,32,0.5693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,64,0.5695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,4,0.5490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,256,0.7432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,512,0.9035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,128,0.6375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,16384,2.3102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,8192,2.0906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,2048,2.7358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,2,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,4,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,8,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,64,0.1149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,16,0.1739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,1024,1.2746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,128,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,32768,2.7390
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,256,0.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,4096,2.0142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,512,0.1181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,8192,0.1413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,32,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,16384,0.1477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,32768,0.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,1024,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,4096,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,2,0.1202
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,131072,0.3332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,65536,0.2384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,2048,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,32,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,8,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,16,0.1287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,64,0.1280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,256,0.1271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,1024,0.1325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,128,0.1234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,2048,0.2585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,512,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,16384,0.1633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,32768,0.1911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,4096,0.1523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,4,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,4,0.1252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,2,0.1249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,131072,0.3512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,65536,0.2482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,8,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,64,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,16,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,8192,0.1550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,512,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,256,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,2048,0.2646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,1024,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,8192,0.1596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,16384,0.1725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,4096,0.1568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,32,0.1229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,2,0.1262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,128,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,131072,0.3635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,32768,0.1911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,16,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,32,0.1260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,64,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,4,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,256,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,128,0.1204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,65536,0.2585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,8,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,2048,0.2715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,16384,0.1709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,512,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,8192,0.1587
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,1024,0.1321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,32768,0.2028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,4,0.1234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,4096,0.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,131072,0.3827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,65536,0.2662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,8,0.1270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,32,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,16,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,128,0.1300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,2,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,2048,0.2779
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,512,0.1418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,4096,0.1597
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,256,0.1329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,64,0.1240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,32768,0.2126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,1024,0.1395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,65536,0.2847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,16384,0.1814
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,8192,0.1744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,8,0.1068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,131072,0.4134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,64,0.1064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,32,0.1076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,4,0.1080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,128,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,1024,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,256,0.1225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,2048,0.2732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,512,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,8192,0.1689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,16,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,32768,0.2174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,65536,0.3016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,16384,0.1804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,2,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,2,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,8,0.1144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,4096,0.1610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,4,0.1108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,64,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,128,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,32,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,16,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,1024,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,2048,0.2987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,256,0.1250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,131072,0.4524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,512,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,65536,0.3653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,8192,0.1978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,4096,0.1886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,32768,0.2649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,131072,0.5553
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,16384,0.2189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,16,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,32,0.1352
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,8,0.1343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,4,0.1301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,256,0.1516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,64,0.1356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,512,0.1665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,2,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,128,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,2048,0.3751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,8192,0.2840
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,1024,0.1959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,131072,0.7924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,65536,0.5209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,4096,0.2629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,2,0.1835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,4,0.1842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,32768,0.3783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,8,0.1857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,32,0.1848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,16,0.1798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,16384,0.3143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,512,0.2349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,256,0.2125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,2048,0.6239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,64,0.1828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,16384,0.4945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,8192,0.4319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,1024,0.2900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,32768,0.6164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,128,0.1996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,4,0.2732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,2,0.2730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,131072,1.3986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,4096,0.4057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,65536,0.8466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,8,0.2825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,64,0.2721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,32,0.2817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,256,0.3199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,512,0.3754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,1024,0.4770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,16,0.2815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,2048,1.1191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,16384,0.8659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,8192,0.7517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,128,0.2950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,2,0.4667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,4096,0.7047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,16,0.4478
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,32768,1.0941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,65536,1.6033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,32,0.4498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,4,0.4681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,8,0.4479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,128,0.5110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,256,0.5449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,64,0.4675
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,512,0.6526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,4096,1.2852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,2048,2.0125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,8192,1.3778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,2,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,4,0.1086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,32768,2.0228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,16384,1.5999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,16,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,8,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,64,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,1024,0.8329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,32,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,4096,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,1024,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,256,0.1056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,512,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,128,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,2048,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,8192,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,131072,0.3190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,65536,0.2178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,2,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,8,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,16,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,32,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,32768,0.1630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,64,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,256,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,512,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,2048,0.2444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,1024,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,4096,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,16384,0.1347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,128,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,8192,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,32768,0.1704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,16384,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,2,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,131072,0.3401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,4,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,4,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,32,0.1150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,65536,0.2279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,128,0.1165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,64,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,8,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,16,0.1140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,1024,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,4096,0.1332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,16384,0.1540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,8192,0.1367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,2048,0.2488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,32768,0.1757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,131072,0.3591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,2,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,65536,0.2442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,4,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,16,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,64,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,8,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,512,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,256,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,512,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,1024,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,2048,0.2466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,256,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,4096,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,8192,0.1398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,32,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,65536,0.2505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,128,0.1122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,32768,0.1830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,131072,0.3701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,8,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,4,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,16,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,32,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,16384,0.1536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,512,0.1146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,256,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,128,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,1024,0.1183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,4096,0.1424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,8192,0.1515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,32768,0.2037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,65536,0.2710
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,131072,0.4004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,2,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,64,0.1117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,2,0.1139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,2048,0.2622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,16,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,32,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,64,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,128,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,8,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,16384,0.1736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,4,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,2048,0.2623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,512,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,8192,0.1552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,32768,0.2072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,4096,0.1491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,256,0.1065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,131072,0.4369
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,4,0.1026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,8,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,16384,0.1723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,65536,0.2912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,64,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,32,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,256,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,2,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,16,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,1024,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,512,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,128,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,2048,0.2816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,32768,0.2477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,16384,0.1976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,65536,0.3474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,131072,0.5363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,2,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,8,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,1024,0.1359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,4096,0.1716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,32,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,4,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,8192,0.1785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,64,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,16,0.1148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,128,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,256,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,4096,0.2152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,2048,0.3208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,16384,0.2595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,1024,0.1630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,32768,0.3257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,512,0.1499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,65536,0.4739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,2,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,8,0.1587
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,4,0.1586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,32,0.1573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,8192,0.2306
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,256,0.1858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,131072,0.7419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,128,0.1740
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,64,0.1666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,512,0.2042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,4096,0.3229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,16,0.1632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,16384,0.4049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,32768,0.5282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,1024,0.2331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,2048,0.5370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,2,0.2518
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,4,0.2383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,16,0.2403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,131072,1.3116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,32,0.2508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,128,0.2586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,8,0.2505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,65536,0.7560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,64,0.2548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,8192,0.3604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,1024,0.3695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,512,0.3139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,4096,0.5574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,8192,0.6096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,2048,0.9730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,256,0.2758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,65536,1.4604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,16384,0.7079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,2,0.4011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,16,0.4131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,8,0.4146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,32768,0.9526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,4,0.4139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,64,0.3941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,128,0.4244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,32,0.3948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,256,0.4607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,512,0.5385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,2048,1.6792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,16384,1.2626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,8192,1.0494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,1024,0.6540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,4,0.1003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,32768,1.7066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,2,0.0976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,32,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,64,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,4096,0.9649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,8,0.0962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,16,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,1024,0.0997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,256,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,2048,0.1064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,8192,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,4096,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,16384,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,128,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,512,0.0979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,2,0.1082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,4,0.1087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,65536,0.2149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,32,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,16,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,32768,0.1521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,131072,0.3183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,64,0.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,8,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,128,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,256,0.1147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,4096,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,1024,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,32768,0.1651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,65536,0.2303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,2048,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,512,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,2,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,8192,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,16384,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,32,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,16,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,8,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,131072,0.3266
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,128,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,4,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,1024,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,2048,0.2493
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,8192,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,16384,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,256,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,32768,0.1702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,4096,0.1261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,64,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,512,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,4,0.1084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,131072,0.3450
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,2,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,16,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,65536,0.2404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,8,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,512,0.1065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,256,0.1068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,1024,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,32,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,4096,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,2048,0.2453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,64,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,16384,0.1465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,65536,0.2499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,8192,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,131072,0.3632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,2,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,8,0.1081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,16,0.1143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,32,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,64,0.1089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,4,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,256,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,128,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,512,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,32768,0.1766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,4096,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,8192,0.1459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,32768,0.1976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,1024,0.1164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,128,0.1141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,2048,0.2531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,2,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,131072,0.3999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,8,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,16384,0.1671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,32,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,64,0.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,4,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,65536,0.2658
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,16,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,256,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,128,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,2048,0.2562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,8192,0.1502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,1024,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,32768,0.2021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,512,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,16384,0.1674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,2,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,8,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,16,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,32,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,64,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,4096,0.1447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,128,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,4,0.1004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,1024,0.1296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,65536,0.2855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,131072,0.4336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,512,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,16384,0.1935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,4096,0.1651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,256,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,131072,0.5343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,65536,0.3447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,2048,0.2788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,8,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,8192,0.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,2,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,64,0.1161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,32768,0.2418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,256,0.1249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,128,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,512,0.1430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,1024,0.1610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,4096,0.2071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,8192,0.2282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,2048,0.3190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,32768,0.3269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,65536,0.4646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,32,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,4,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,131072,0.7393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,4,0.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,16384,0.2569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,16,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,8,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,16,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,32,0.1546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,64,0.1614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,256,0.1829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,2,0.1484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,512,0.1951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,4096,0.3201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,1024,0.2261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,16384,0.4046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,32768,0.5173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,128,0.1712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,2,0.2295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,8192,0.3501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,2048,0.5288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,131072,1.3061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,4,0.2398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,16,0.2414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,32,0.2324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,8,0.2321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,256,0.2808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,512,0.3117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,1024,0.3698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,2048,0.9485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,64,0.2405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,4096,0.5382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,8192,0.5981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,32768,0.9413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,16384,0.7093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,128,0.2642
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,4,0.3776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,8,0.3992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,65536,1.4548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,65536,0.7443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,2,0.3871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,128,0.4069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,16,0.3988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,256,0.4562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,64,0.4014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,2048,1.6383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,32,0.3986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,8192,1.0482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,1024,0.6216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,512,0.4987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,4096,0.9250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,32768,1.6991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,2,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,8,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,32,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,16,0.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,16384,1.2602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,4,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,128,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,512,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,2048,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,256,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,16384,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,64,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,8192,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,1024,0.0953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,65536,0.2111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,32768,0.1525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,4096,0.1088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,8,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,2,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,131072,0.3081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,32,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,4,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,64,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,512,0.1080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,2048,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,4096,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,1024,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,8192,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,128,0.1038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,65536,0.2192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,32768,0.1627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,2,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,256,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,4,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,131072,0.3257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,32,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,16,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,128,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,16,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,16384,0.1379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,512,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,256,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,4096,0.1275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,1024,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,8,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,32768,0.1690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,16384,0.1398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,8192,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,64,0.1043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,4,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,8,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,131072,0.3421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,2048,0.2386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,64,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,16,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,128,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,32,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,2,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,256,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,4096,0.1231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,512,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,8192,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,16384,0.1371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,65536,0.2446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,1024,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,32768,0.1686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,2048,0.2365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,4,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,16,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,2,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,8,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,32,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,131072,0.3595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,256,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,64,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,1024,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,2048,0.2443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,4096,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,512,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,65536,0.2303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,128,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,32768,0.1928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,131072,0.3943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,4,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,16384,0.1522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,65536,0.2610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,2,0.0955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,16,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,64,0.0937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,8,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,32,0.0944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,1024,0.1067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,128,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,2048,0.2512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,8192,0.1478
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,16384,0.1625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,4096,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,65536,0.2853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,512,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,8192,0.1379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,32768,0.1972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,256,0.0961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,8,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,16,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,2,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,64,0.0959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,32,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,512,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,4,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,131072,0.4294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,1024,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,128,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,2048,0.2716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,4096,0.1615
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,16384,0.1926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,8192,0.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,256,0.1047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,32768,0.2396
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,131072,0.5301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,4,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,65536,0.3409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,32,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,64,0.1117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,128,0.1172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,256,0.1225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,512,0.1418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,2,0.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,4096,0.2110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,8192,0.2237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,8,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,32768,0.3212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,2048,0.3186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,16384,0.2556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,1024,0.1577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,16,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,4,0.1446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,16,0.1470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,8,0.1512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,2,0.1511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,65536,0.4652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,131072,0.7365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,64,0.1575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,256,0.1792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,512,0.1979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,32,0.1473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,2048,0.5274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,4096,0.3124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,1024,0.2299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,128,0.1627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,65536,0.7467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,16384,0.4045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,131072,1.3073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,4,0.2373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,2,0.2262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,8,0.2261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,8192,0.3431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,16,0.2299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,32768,0.5184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,64,0.2436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,128,0.2597
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,1024,0.3568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,256,0.2671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,32,0.2408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,8192,0.5912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,16384,0.7044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,2048,0.9456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,512,0.3041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,4096,0.5423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,2,0.3959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,4,0.3956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,32768,0.9273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,16,0.3761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,8,0.3975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,65536,1.4495
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,128,0.4236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,256,0.4524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,512,0.4904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,1024,0.6061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,64,0.3977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,4096,0.9171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,32,0.3826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,2,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,32768,1.6710
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,4,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,2048,1.6515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,16384,1.2534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,8192,1.0387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,16,0.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,8,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,64,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,32,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,128,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,2048,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,16384,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,512,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,8192,0.1120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,32768,0.1468
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,4096,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,131072,0.3133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,2,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,65536,0.2112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,256,0.0941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,16,0.1019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,32,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,128,0.1063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,64,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,256,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,4,0.1017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,512,0.1048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,1024,0.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,8,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,4096,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,1024,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,16384,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,8192,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,131072,0.3243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,2,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,32768,0.1582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,2048,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,8,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,16,0.1017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,32,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,64,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,128,0.1024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,4,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,65536,0.2179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,1024,0.1047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,4096,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,256,0.1063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,2048,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,512,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,65536,0.2262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,2,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,131072,0.3402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,32768,0.1609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,16384,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,16,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,32,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,64,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,8192,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,128,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,4,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,256,0.1039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,4096,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,512,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,8,0.1000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,16384,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,8192,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,2048,0.2385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,32768,0.1722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,2,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,1024,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,8,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,16,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,65536,0.2395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,32,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,128,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,4,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,64,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,512,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,256,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,4096,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,1024,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,8192,0.1355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,2048,0.2453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,32768,0.1896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,131072,0.3563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,131072,0.3935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,65536,0.2608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,4,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,2,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,16,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,8,0.0939
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,128,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,256,0.0937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,1024,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,16384,0.1522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,4096,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,32,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,512,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,16384,0.1609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,8192,0.1447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,64,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,2048,0.2491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,131072,0.4293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,65536,0.2813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,8,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,2,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,32,0.0984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,4,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,256,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,512,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,16,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,64,0.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,32768,0.1952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,8192,0.1693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,4096,0.1616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,128,0.0987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,16384,0.1909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,2048,0.2726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,2,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,65536,0.3382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,4,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,8,0.1076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,16,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,64,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,128,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,32,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,256,0.1224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,1024,0.1532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,32768,0.2373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,131072,0.5288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,1024,0.1281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,8192,0.2214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,512,0.1393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,4096,0.2067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,65536,0.4619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,16384,0.2543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,131072,0.7387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,4,0.1470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,32768,0.3203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,2,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,32,0.1476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,2048,0.3160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,16,0.1431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,8,0.1439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,128,0.1653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,256,0.1786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,1024,0.2268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,4096,0.3071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,8192,0.3397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,64,0.1535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,65536,0.7460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,2,0.2243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,2048,0.5216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,131072,1.2975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,32768,0.5157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,16384,0.3997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,512,0.1914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,16,0.2265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,8,0.2301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,4,0.2306
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,128,0.2567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,1024,0.3558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,32,0.2265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,2048,0.9472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,512,0.3045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,4096,0.5393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,64,0.2288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,32768,0.9257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,16384,0.6947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,256,0.2621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,4,0.3888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,8192,0.5889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,32,0.3738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,64,0.3668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,65536,1.4542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,8,0.3663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,256,0.4446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,2,0.3871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,512,0.4856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,1024,0.5950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,128,0.4173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,16,0.3677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,4096,0.9284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,8192,1.0328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,2048,1.6265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,4,0.0946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,16384,1.2307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,2,0.0890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,32,0.0900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,32768,1.6783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,8,0.0890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,128,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,256,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,16,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,2048,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,64,0.0914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,8192,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,1024,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,512,0.0915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,131072,0.3042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,4096,0.1108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,65536,0.2034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,2,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,32768,0.1472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,16,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,32,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,4,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,16384,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,64,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,128,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,1024,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,4096,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,2048,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,8192,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,16384,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,65536,0.2197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,512,0.1017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,131072,0.3243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,32768,0.1620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,4,0.1028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,16,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,2,0.1003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,8,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,64,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,256,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,32,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,8,0.0997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,256,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,512,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,128,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,1024,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,2048,0.1140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,4096,0.1164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,65536,0.2215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,131072,0.3405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,2,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,32768,0.1585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,16,0.0976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,4,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,32,0.1019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,64,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,8192,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,16384,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,8,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,1024,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,2048,0.2385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,4096,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,256,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,32768,0.1701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,16384,0.1323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,128,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,8192,0.1225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,2,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,65536,0.2419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,512,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,16,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,64,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,128,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,8,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,4,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,512,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,2048,0.2423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,4096,0.1242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,256,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,1024,0.1061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,32,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,65536,0.2594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,8192,0.1374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,2,0.0923
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,131072,0.3551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,16384,0.1502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,131072,0.3915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,16,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,32768,0.1909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,128,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,4,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,256,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,1024,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,2048,0.2405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,32,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,64,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,8192,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,512,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,32768,0.1944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,4096,0.1345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,131072,0.4271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,65536,0.2792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,8,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,8,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,16,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,32,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,16384,0.1597
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,256,0.1002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,512,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,2,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,2048,0.2711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,4096,0.1584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,128,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,8192,0.1686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,16384,0.1912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,65536,0.3372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,1024,0.1241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,131072,0.5278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,64,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,2,0.1086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,8,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,32768,0.2373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,32,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,64,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,4,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,512,0.1385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,16,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,1024,0.1521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,128,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,2048,0.3148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,256,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,32768,0.3184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,65536,0.4639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,4,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,131072,0.7337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,16384,0.2529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,4096,0.2108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,2,0.1424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,8192,0.2216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,16,0.1410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,64,0.1502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,128,0.1618
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,8,0.1444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,32,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,512,0.1929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,4,0.1384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,2048,0.5233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,256,0.1745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,1024,0.2187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,32768,0.5140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,16384,0.3934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,65536,0.7397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,2,0.2166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,4096,0.3156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,4,0.2217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,8192,0.3410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,32,0.2205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,8,0.2271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,131072,1.3004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,16,0.2197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,64,0.2254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,512,0.3020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,1024,0.3510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,128,0.2470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,2048,0.9441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,256,0.2714
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,8192,0.5763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,16384,0.6855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,4096,0.5355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,32768,0.9290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,4,0.3815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,8,0.3625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,65536,1.4404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,32,0.3599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,16,0.3843
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,2,0.3690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,64,0.3850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,128,0.4117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,1024,0.5861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,512,0.4937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,8192,1.0123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,4096,0.9243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,2048,1.6182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,2,0.1688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,16384,1.2399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,4,0.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,8,0.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,256,0.4179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,32768,1.6764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,64,0.1660
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,32,0.1730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,128,0.1650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,1024,0.1846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,512,0.1800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,256,0.1770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,2048,0.1827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,8192,0.1947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,32768,0.2362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,65536,0.2885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,16384,0.1993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,131072,0.4001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,4096,0.1954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,4,0.1793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,16,0.1788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,16,0.1791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,2,0.1747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,32,0.1706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,128,0.1810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,256,0.1824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,512,0.1943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,8,0.1813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,2048,0.1871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,8192,0.2013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,1024,0.1813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,64,0.1813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,32768,0.2567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,65536,0.3031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,4096,0.1978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,16384,0.2204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,2,0.1798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,131072,0.3984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,32,0.1791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,8,0.1832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,64,0.1783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,16,0.1823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,512,0.1807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,4,0.1869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,256,0.1855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,2048,0.1893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,8192,0.2107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,16384,0.2198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,65536,0.3035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,32768,0.2510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,131072,0.4065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,2,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,4096,0.2037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,4,0.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,1024,0.1847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,32,0.1467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,128,0.1863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,64,0.1504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,128,0.1481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,8,0.1480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,256,0.1497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,16,0.1532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,4096,0.1768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,16384,0.1895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,32768,0.2180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,512,0.1545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,2048,0.2882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,8192,0.1799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,2,0.1517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,4,0.1504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,8,0.1523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,65536,0.2865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,16,0.1569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,1024,0.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,32,0.1507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,128,0.1526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,512,0.1603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,1024,0.1610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,2048,0.2902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,4096,0.1771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,64,0.1519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,16384,0.1996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,8192,0.1831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,32768,0.2279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,131072,0.4007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,131072,0.4267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,4,0.1449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,8,0.1426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,32,0.1458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,65536,0.2968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,64,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,128,0.1513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,2,0.1431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,16,0.1446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,512,0.1548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,2048,0.2875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,4096,0.1774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,256,0.1552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,16384,0.1973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,8192,0.1820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,256,0.1497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,65536,0.3154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,1024,0.1544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,4,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,131072,0.4646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,8,0.1511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,2,0.1525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,32,0.1518
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,128,0.1544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,64,0.1527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,256,0.1559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,1024,0.1603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,512,0.1561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,4096,0.1884
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,16,0.1520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,2048,0.2983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,32768,0.2345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,65536,0.3654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,131072,0.5583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,16384,0.2186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,8192,0.1974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,8,0.1827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,2,0.1823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,32768,0.2651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,32,0.1832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,4,0.1801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,64,0.1825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,16,0.1797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,1024,0.1969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,256,0.1896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,512,0.1901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,2048,0.3470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,16384,0.2818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,32768,0.3490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,8192,0.2541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,65536,0.4935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,2,0.2506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,128,0.1841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,8,0.2466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,16,0.2487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,4,0.2436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,131072,0.7654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,4096,0.2349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,64,0.2527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,256,0.2546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,32,0.2493
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,128,0.2522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,1024,0.2807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,4096,0.3512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,8192,0.3810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,16384,0.4315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,2048,0.5585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,512,0.2651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,65536,0.7846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,2,0.3728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,16,0.3656
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,4,0.3744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,131072,1.3362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,32768,0.5540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,128,0.3799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,8,0.3641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,64,0.3746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,256,0.3791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,1024,0.4351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,512,0.4045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,32,0.3652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,8192,0.6060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,2048,0.9767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,4096,0.5645
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,4,0.6715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,2,0.6869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,8,0.6654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,65536,1.4733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,16384,0.7298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,16,0.6834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,32,0.6860
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,32768,0.9468
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,64,0.6735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,256,0.6999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,512,0.7292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,1024,0.8187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,4096,1.0307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,8192,1.1239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,2048,1.7528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,4,0.1368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,2,0.1373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,8,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,32768,1.7601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,16384,1.3305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,16,0.1399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,64,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,32,0.1355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,128,0.7004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,128,0.1386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,512,0.1389
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,4096,0.1630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,1024,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,256,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,32768,0.1969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,65536,0.2523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,2,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,2048,0.2732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,131072,0.3622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,8192,0.1629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,32,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,4,0.1486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,64,0.1538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,16,0.1492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,128,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,256,0.1450
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,512,0.1436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,8,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,16384,0.1771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,2048,0.2836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,32768,0.2025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,16384,0.1786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,65536,0.2649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,4096,0.1708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,1024,0.1450
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,131072,0.3823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,8,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,8192,0.1807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,16,0.1464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,4,0.1449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,64,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,256,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,128,0.1530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,2,0.1536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,1024,0.1505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,32,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,4096,0.1698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,16384,0.1873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,32768,0.2153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,65536,0.2701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,8192,0.1711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,512,0.1490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,2,0.1395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,2048,0.2792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,16,0.1487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,8,0.1455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,32,0.1408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,131072,0.3766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,4,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,1024,0.1520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,64,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,128,0.1406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,2048,0.2769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,4096,0.1644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,256,0.1399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,512,0.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,16384,0.1837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,65536,0.2686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,131072,0.3889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,4,0.1344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,2,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,8192,0.1793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,16,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,32,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,64,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,256,0.1384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,32768,0.2137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,512,0.1397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,8,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,128,0.1326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,1024,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,8192,0.1691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,4096,0.1596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,131072,0.4093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,2048,0.2715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,2,0.1224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,65536,0.2816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,4,0.1234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,16,0.1198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,64,0.1220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,32768,0.2123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,8,0.1212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,32,0.1200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,1024,0.1342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,128,0.1227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,512,0.1294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,2048,0.2776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,256,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,4096,0.1622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,16384,0.1811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,8192,0.1672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,32768,0.2205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,131072,0.4516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,16384,0.1846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,8,0.1295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,65536,0.3035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,16,0.1291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,32,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,4,0.1295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,256,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,2,0.1273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,128,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,2048,0.3114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,64,0.1304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,4096,0.1951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,8192,0.2066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,16384,0.2246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,65536,0.3749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,32768,0.2740
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,2,0.1525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,131072,0.5655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,4,0.1516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,16,0.1493
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,512,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,1024,0.1535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,32,0.1520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,64,0.1510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,8,0.1492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,512,0.1748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,4096,0.2703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,1024,0.2002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,16384,0.3098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,128,0.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,2048,0.3759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,65536,0.5233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,32768,0.3792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,2,0.2054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,131072,0.7998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,8,0.2092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,256,0.1662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,32,0.2041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,16,0.2077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,64,0.2040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,4,0.2097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,256,0.2346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,8192,0.2844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,1024,0.2901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,512,0.2548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,8192,0.4350
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,4096,0.4027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,2048,0.6135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,32768,0.6076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,65536,0.8342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,2,0.3163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,128,0.2218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,8,0.3057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,131072,1.3970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,16,0.3162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,16384,0.4916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,32,0.3060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,256,0.3530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,4,0.3125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,64,0.3159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,2048,1.1244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,1024,0.4940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,4096,0.7027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,8192,0.7540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,512,0.4080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,32768,1.0985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,2,0.5358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,128,0.3407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,8,0.5168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,16,0.5354
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,65536,1.6290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,4,0.5152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,32,0.5226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,16384,0.8734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,64,0.5359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,512,0.7172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,1024,0.8764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,128,0.5841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,8192,1.4062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,2048,2.0207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,4096,1.2880
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,16384,1.6165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,4,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,2,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,16,0.1124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,8,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,32768,2.0489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,256,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,512,0.1161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,256,0.6101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,128,0.1141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,2048,0.2469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,1024,0.1182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,4096,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,16384,0.1467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,32768,0.1706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,131072,0.3433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,8192,0.1370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,2,0.1219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,4,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,65536,0.2301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,16,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,32,0.1292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,64,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,128,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,256,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,8,0.1284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,1024,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,512,0.1282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,2048,0.2576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,4096,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,8192,0.1478
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,16384,0.1630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,65536,0.2504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,131072,0.3461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,4,0.1230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,32768,0.1880
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,8,0.1272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,32,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,2,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,16,0.1247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,256,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,512,0.1248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,1024,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,128,0.1273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,2048,0.2607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,64,0.1227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,8192,0.1526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,4096,0.1498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,32768,0.1919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,16384,0.1626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,131072,0.3574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,2,0.1167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,4,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,65536,0.2552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,8,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,64,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,32,0.1189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,16,0.1206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,256,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,128,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,512,0.1260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,2048,0.2578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,1024,0.1251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,8192,0.1527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,4096,0.1445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,131072,0.3719
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,65536,0.2588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,16,0.1248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,2,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,8,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,32,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,4,0.1234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,64,0.1252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,128,0.1266
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,256,0.1239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,1024,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,2048,0.2650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,4096,0.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,512,0.1309
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,8192,0.1579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,32768,0.1981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,16384,0.1688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,65536,0.2702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,2,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,4,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,8,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,16,0.1047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,32,0.1040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,131072,0.3998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,128,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,256,0.1101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,512,0.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,1024,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,2048,0.2585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,4096,0.1446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,8192,0.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,64,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,32768,0.2006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,16384,0.1645
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,2,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,131072,0.4338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,8,0.1106
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,16,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,4,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,32,0.1110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,65536,0.2855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,128,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,256,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,512,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,64,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,1024,0.1287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,8192,0.1682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,4096,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,2048,0.2729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,16384,0.1895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,32768,0.2376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,65536,0.3397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,32768,0.1984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,2,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,131072,0.5279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,8,0.1305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,16,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,4,0.1291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,64,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,128,0.1456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,16384,0.1665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,256,0.1377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,2048,0.3159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,512,0.1487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,32,0.1283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,16384,0.2535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,32768,0.3177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,1024,0.1588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,4096,0.2058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,131072,0.7336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,4,0.1787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,65536,0.4629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,8192,0.2201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,16,0.1733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,32,0.1776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,128,0.1882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,64,0.1794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,256,0.1900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,8,0.1735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,2048,0.5203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,4096,0.3081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,32768,0.5071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,2,0.1750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,8192,0.3367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,1024,0.2252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,512,0.2009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,65536,0.7432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,16384,0.3913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,131072,1.3022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,16,0.2610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,8,0.2713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,4,0.2594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,2,0.2592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,128,0.2785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,256,0.2849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,32,0.2601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,64,0.2698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,512,0.3196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,2048,0.9120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,8192,0.5552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,4096,0.5022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,1024,0.3676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,16384,0.6640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,8,0.4285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,65536,1.4043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,16,0.4275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,32768,0.8924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,32,0.4473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,4,0.4268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,64,0.4282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,128,0.4602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,512,0.5483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,1024,0.6227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,2048,1.6045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,2,0.4475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,8192,0.9901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,256,0.4790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,16384,1.2053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,4096,0.9062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,8,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,32768,1.6585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,32,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,16,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,64,0.1062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,2,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,256,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,512,0.1060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,4,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,128,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,2048,0.2375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,8192,0.1288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,1024,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,16384,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,32768,0.1595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,2,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,4,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,4096,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,16,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,65536,0.2257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,131072,0.3229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,64,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,256,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,512,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,1024,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,2048,0.2446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,8192,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,32,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,16384,0.1460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,32768,0.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,131072,0.3408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,2,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,4096,0.1300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,4,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,65536,0.2299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,8,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,128,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,128,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,16,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,8,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,1024,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,2048,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,512,0.1158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,256,0.1143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,8192,0.1396
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,4096,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,16384,0.1487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,32768,0.1798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,131072,0.3503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,65536,0.2372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,64,0.1139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,4,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,16,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,2,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,128,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,8,0.1101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,32,0.1090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,512,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,64,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,2048,0.2460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,4096,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,16384,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,32,0.1139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,65536,0.2464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,8192,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,32768,0.1774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,2,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,1024,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,4,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,32,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,256,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,64,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,131072,0.3635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,512,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,128,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,256,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,2048,0.2527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,4096,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,1024,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,16,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,32768,0.1894
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,8,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,65536,0.2640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,2,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,16384,0.1622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,16,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,131072,0.3931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,4,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,32,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,8,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,8192,0.1457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,256,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,512,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,64,0.1005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,128,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,4096,0.1331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,2048,0.2453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,65536,0.2764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,8192,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,32768,0.1936
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,1024,0.1103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,2,0.1026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,16,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,16384,0.1583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,8,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,64,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,128,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,32,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,512,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,1024,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,2048,0.2587
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,8192,0.1574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,16384,0.1765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,131072,0.4246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,4096,0.1456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,32768,0.2313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,4,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,131072,0.5169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,256,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,8,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,65536,0.3275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,32,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,2,0.1179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,4,0.1119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,64,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,512,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,256,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,2048,0.2854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,1024,0.1392
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,16,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,4096,0.1744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,32768,0.2904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,16384,0.2245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,4,0.1522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,2,0.1517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,131072,0.7027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,65536,0.4318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,16,0.1587
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,8192,0.1948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,64,0.1532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,32,0.1526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,128,0.1185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,128,0.1628
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,512,0.1824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,256,0.1726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,2048,0.4636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,8192,0.2857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,16384,0.3377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,8,0.1542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,4096,0.2527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,32768,0.4615
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,2,0.2426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,65536,0.6888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,16,0.2333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,1024,0.1991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,4,0.2335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,131072,1.2399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,64,0.2446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,128,0.2454
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,32,0.2431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,512,0.2798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,1024,0.3023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,4096,0.4307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,8192,0.4721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,16384,0.5938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,8,0.2303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,2048,0.8310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,2,0.4024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,4,0.3813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,8,0.4022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,32768,0.8239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,64,0.4007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,16,0.3834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,32,0.3833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,65536,1.3391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,256,0.2530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,256,0.4357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,1024,0.5270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,512,0.4661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,128,0.4005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,4096,0.7127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,2048,1.4305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,8,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,8192,0.8190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,16,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,32768,1.4798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,2,0.0965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,64,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,32,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,4,0.0981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,128,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,512,0.0985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,2048,0.2249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,16384,1.0310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,8192,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,256,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,1024,0.0981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,16384,0.1275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,32768,0.1558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,4096,0.1163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,131072,0.3162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,65536,0.2085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,2,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,16,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,64,0.1119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,8,0.1090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,128,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,1024,0.1098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,32,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,512,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,4,0.1119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,16384,0.1393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,256,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,4096,0.1269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,32768,0.1634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,2048,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,131072,0.3346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,4,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,2,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,32,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,8,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,64,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,256,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,65536,0.2278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,8192,0.1280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,1024,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,512,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,16,0.1079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,16384,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,128,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,32768,0.1684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,65536,0.2355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,2,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,131072,0.3418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,4,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,8,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,32,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,16,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,8192,0.1343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,64,0.1056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,128,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,256,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,1024,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,512,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,4096,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,4096,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,8192,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,32768,0.1775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,65536,0.2385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,16384,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,2048,0.2415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,4,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,131072,0.3572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,2048,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,32,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,64,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,256,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,128,0.1123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,8,0.1127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,16,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,4096,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,2048,0.2457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,16384,0.1584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,8192,0.1405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,2,0.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,512,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,65536,0.2607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,32768,0.1857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,1024,0.1147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,131072,0.3894
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,32,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,4,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,8,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,128,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,256,0.0981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,16,0.0978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,512,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,2,0.0976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,64,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,16384,0.1523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,4096,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,8192,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,131072,0.4193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,1024,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,2,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,4,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,16,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,8,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,32,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,64,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,128,0.1004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,512,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,256,0.1021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,1024,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,32768,0.1895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,2048,0.2528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,8192,0.1535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,4096,0.1433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,65536,0.2714
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,65536,0.3227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,131072,0.5101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,4,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,32768,0.2223
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,8,0.1104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,2,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,16,0.1098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,64,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,16384,0.1726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,32,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,256,0.1164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,512,0.1291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,4096,0.1722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,2048,0.2366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,128,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,1024,0.1369
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,65536,0.4285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,2048,0.2833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,8192,0.1887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,16384,0.2193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,4,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,32768,0.2904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,16,0.1502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,32,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,128,0.1522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,131072,0.7018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,8,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,2,0.1460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,1024,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,512,0.1781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,256,0.1691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,64,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,16384,0.3344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,32768,0.4485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,2048,0.4578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,8192,0.2791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,2,0.2214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,4,0.2239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,65536,0.6849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,131072,1.2433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,32,0.2324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,16,0.2219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,8,0.2203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,128,0.2381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,64,0.2261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,4096,0.2473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,2048,0.8238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,4096,0.4108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,1024,0.3033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,512,0.2716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,8192,0.4649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,16384,0.5726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,32768,0.8039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,8,0.3839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,65536,1.3269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,2,0.3622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,32,0.3641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,256,0.2558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,4,0.3855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,64,0.3623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,256,0.3956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,1024,0.4854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,16,0.3652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,512,0.4271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,4096,0.6906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,2048,1.4296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,8192,0.8177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,4,0.0935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,16384,1.0087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,2,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,16,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,128,0.4033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,32,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,128,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,8,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,1024,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,32768,1.4399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,512,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,2048,0.1047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,8192,0.1120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,256,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,32768,0.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,4096,0.1122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,2,0.1021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,131072,0.3104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,64,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,8,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,32,0.1026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,4,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,65536,0.2099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,16384,0.1245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,16,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,1024,0.1044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,512,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,2048,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,64,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,8192,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,128,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,65536,0.2224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,16384,0.1341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,4096,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,131072,0.3244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,8,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,256,0.1063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,2,0.1057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,32,0.1048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,16,0.1048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,128,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,32768,0.1584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,64,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,256,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,1024,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,4,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,8192,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,32768,0.1649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,2048,0.2371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,65536,0.2239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,4096,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,4,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,8,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,131072,0.3412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,2,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,16,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,64,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,128,0.1025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,32,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,256,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,1024,0.1044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,512,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,4096,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,16384,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,2048,0.2375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,16384,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,512,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,131072,0.3536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,4,0.1064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,65536,0.2391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,8192,0.1272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,32768,0.1629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,16,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,64,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,256,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,512,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,32,0.1040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,2,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,4096,0.1254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,8192,0.1326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,1024,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,2048,0.2420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,16384,0.1467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,65536,0.2592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,131072,0.3840
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,2,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,128,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,16,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,8,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,8,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,32,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,32768,0.1815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,4,0.0936
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,1024,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,512,0.0981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,2048,0.2323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,64,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,4096,0.1204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,8192,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,256,0.0942
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,131072,0.4151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,32768,0.1851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,65536,0.2674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,128,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,16384,0.1487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,16,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,2,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,8,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,128,0.0978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,256,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,32,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,512,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,1024,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,8192,0.1497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,4096,0.1404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,64,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,16384,0.1693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,65536,0.3195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,131072,0.5092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,2048,0.2503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,4,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,8,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,16,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,2,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,4,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,32768,0.2173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,256,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,64,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,2048,0.2795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,512,0.1242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,128,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,16384,0.2185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,1024,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,32768,0.2846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,2,0.1392
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,131072,0.6992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,4096,0.1737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,65536,0.4287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,32,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,8192,0.1889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,8,0.1399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,4,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,128,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,32,0.1425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,256,0.1586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,64,0.1485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,1024,0.1875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,512,0.1708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,16,0.1398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,8192,0.2737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,32768,0.4529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,4096,0.2484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,65536,0.6773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,4,0.2144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,2,0.2176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,8,0.2275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,2048,0.4626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,16,0.2276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,64,0.2205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,256,0.2523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,32,0.2176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,512,0.2681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,1024,0.2999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,16384,0.3279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,131072,1.2400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,128,0.2330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,4096,0.4177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,8192,0.4575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,2048,0.8183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,2,0.3590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,16384,0.5771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,32768,0.8067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,4,0.3574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,32,0.3575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,16,0.3564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,128,0.3959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,65536,1.3230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,256,0.4119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,64,0.3593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,8,0.3777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,1024,0.4791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,512,0.4199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,8192,0.7890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,2048,1.4008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,16384,1.0228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,8,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,4,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,32768,1.4345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,4096,0.7022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,2,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,64,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,32,0.0898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,256,0.0965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,1024,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,16,0.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,8192,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,4096,0.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,16384,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,32768,0.1534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,65536,0.2094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,2048,0.2261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,4,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,128,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,512,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,16,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,2,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,128,0.1026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,256,0.1026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,131072,0.3041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,1024,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,512,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,32,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,8,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,64,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,2048,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,8192,0.1232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,131072,0.3249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,65536,0.2185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,32768,0.1613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,4,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,16,0.1090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,8,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,32,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,2,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,16384,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,256,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,512,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,4096,0.1183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,64,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,2048,0.2372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,8192,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,4096,0.1190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,16384,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,65536,0.2240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,2,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,32768,0.1600
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,128,0.1020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,16,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,1024,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,64,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,32,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,256,0.1044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,4,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,1024,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,8,0.1039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,131072,0.3388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,8192,0.1226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,4096,0.1181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,128,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,65536,0.2364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,131072,0.3512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,512,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,16384,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,2048,0.2328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,2,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,32,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,64,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,128,0.1018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,8,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,32768,0.1674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,1024,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,4,0.1018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,256,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,16,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,512,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,16384,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,32768,0.1792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,2048,0.2357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,2,0.0913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,65536,0.2525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,131072,0.3794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,4096,0.1227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,4,0.0912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,32,0.0919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,16,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,128,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,512,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,8,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,8192,0.1301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,256,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,2048,0.2290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,1024,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,16384,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,65536,0.2662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,131072,0.4146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,4096,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,32768,0.1817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,64,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,8,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,16,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,8192,0.1310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,32,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,4,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,64,0.0934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,2,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,512,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,128,0.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,1024,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,4096,0.1388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,2048,0.2500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,32768,0.2136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,16384,0.1687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,65536,0.3171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,2,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,256,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,4,0.1065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,8192,0.1461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,16,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,8,0.1043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,131072,0.5069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,256,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,128,0.1109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,512,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,2048,0.2773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,1024,0.1359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,16384,0.2180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,4096,0.1694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,65536,0.4244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,64,0.1062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,8192,0.1871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,2,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,4,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,32,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,32,0.1427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,8,0.1376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,131072,0.6991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,64,0.1439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,512,0.1735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,32768,0.2824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,128,0.1460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,256,0.1576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,16,0.1388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,4096,0.2465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,16384,0.3254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,1024,0.1829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,32768,0.4452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,8192,0.2760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,2048,0.4585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,4,0.2116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,131072,1.2368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,16,0.2106
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,2,0.2201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,65536,0.6761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,32,0.2231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,8,0.2136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,256,0.2392
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,128,0.2275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,512,0.2533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,1024,0.2836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,2048,0.8225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,8192,0.4544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,16384,0.5650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,64,0.2279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,4,0.3685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,65536,1.3191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,4096,0.4117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,32768,0.7933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,2,0.3491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,8,0.3498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,32,0.3685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,128,0.3684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,16,0.3494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,256,0.4027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,2048,1.3942
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,512,0.4100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,8192,0.7792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,64,0.3696
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,16384,0.9920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,4,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,2,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,1024,0.4674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,8,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,16,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,64,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,256,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,512,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,1024,0.0982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,32,0.0955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,4096,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,8192,0.1129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,128,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,16384,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,2048,0.1020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,32768,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,4,0.1028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,32768,1.4438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,131072,0.3080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,4096,0.6939
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,65536,0.2031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,32,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,2,0.1006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,8,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,64,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,16,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,2048,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,4096,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,32768,0.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,8192,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,512,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,128,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,256,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,131072,0.3205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,4,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,16384,0.1341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,8,0.0976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,32,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,16,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,64,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,1024,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,256,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,128,0.1060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,2,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,65536,0.2208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,1024,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,4096,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,512,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,32768,0.1621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,16384,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,8192,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,65536,0.2218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,4,0.0974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,2,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,16,0.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,32,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,64,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,8,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,131072,0.3297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,256,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,2048,0.2342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,2048,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,1024,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,512,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,32768,0.1627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,65536,0.2322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,16384,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,2,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,128,0.0986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,4096,0.1187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,16,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,32,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,64,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,8,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,131072,0.3524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,256,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,8192,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,4,0.1027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,128,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,4096,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,1024,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,2048,0.2357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,16384,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,8192,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,2,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,32768,0.1803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,512,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,4,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,65536,0.2509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,8,0.0897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,32,0.0903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,128,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,64,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,16,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,131072,0.3807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,4096,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,256,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,512,0.0933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,2048,0.2277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,32768,0.1825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,1024,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,131072,0.4133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,4,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,2,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,16,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,8,0.0939
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,64,0.0937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,32,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,16384,0.1447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,8192,0.1252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,128,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,256,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,1024,0.1040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,4096,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,2048,0.2485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,32768,0.2132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,65536,0.3163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,8192,0.1464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,16384,0.1668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,4,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,512,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,16,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,2,0.1066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,32,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,64,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,128,0.1099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,65536,0.2667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,512,0.1192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,256,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,2048,0.2753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,4096,0.1705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,16384,0.2170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,131072,0.5068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,8,0.1047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,32768,0.2823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,2,0.1363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,131072,0.6976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,8,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,65536,0.4235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,4,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,8192,0.1862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,1024,0.1323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,64,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,256,0.1523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,16,0.1362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,128,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,2048,0.4517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,32,0.1379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,512,0.1711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,16384,0.3258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,65536,0.6743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,4096,0.2446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,2,0.2163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,8192,0.2660
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,1024,0.1827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,32768,0.4431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,8,0.2074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,32,0.2176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,131072,1.2338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,16,0.2179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,4,0.2082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,64,0.2119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,256,0.2344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,1024,0.2910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,4096,0.4081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,8192,0.4504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,2048,0.8216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,512,0.2500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,128,0.2273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,4,0.3451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,16384,0.5608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,2,0.3643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,65536,1.3241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,16,0.3437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,64,0.3454
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,8,0.3651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,128,0.3635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,256,0.3973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,1024,0.4829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,2048,1.3874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,4096,0.6883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,32,0.3452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,32768,0.7907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,16384,1.0061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,2,0.1747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,4,0.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,8,0.1664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,512,0.4039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,32,0.1731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,16,0.1748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,256,0.1710
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,64,0.1746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,32768,1.4390
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,8192,0.7737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,1024,0.1811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,128,0.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,512,0.1726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,2048,0.1957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,4096,0.1928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,16384,0.2145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,2,0.1760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,4,0.1908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,8192,0.2071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,65536,0.2996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,32768,0.2286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,8,0.1796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,131072,0.3984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,16,0.1811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,128,0.1813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,64,0.1879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,1024,0.1903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,32,0.1744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,256,0.1864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,2048,0.1906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,16384,0.2271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,512,0.1815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,65536,0.2980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,8192,0.2138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,32768,0.2425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,4,0.1850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,131072,0.4085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,16,0.1750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,4096,0.2125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,32,0.1753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,64,0.1792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,256,0.1958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,1024,0.1975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,128,0.1934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,2048,0.1980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,2,0.1727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,8192,0.2186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,16384,0.2251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,32768,0.2510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,4096,0.2065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,131072,0.4046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,2,0.1532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,65536,0.3025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,4,0.1507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,16,0.1532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,32,0.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,64,0.1481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,8,0.1522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,256,0.1627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,512,0.1655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,512,0.1863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,128,0.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,4096,0.1814
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,1024,0.1711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,2048,0.2918
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,16384,0.1998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,32768,0.2256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,65536,0.2851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,131072,0.4148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,8,0.1835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,8,0.1546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,2,0.1589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,8192,0.1882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,64,0.1568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,16,0.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,256,0.1608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,32,0.1536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,1024,0.1677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,4,0.1532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,4096,0.1874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,8192,0.1924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,512,0.1695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,16384,0.2042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,2048,0.3008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,128,0.1600
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,131072,0.4332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,4,0.1504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,65536,0.3047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,32768,0.2353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,2,0.1486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,32,0.1488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,8,0.1470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,16,0.1486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,64,0.1480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,512,0.1567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,2048,0.3012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,256,0.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,128,0.1520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,8192,0.1941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,16384,0.2078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,131072,0.4745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,2,0.1560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,32768,0.2443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,4,0.1555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,8,0.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,16,0.1548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,1024,0.1620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,65536,0.3262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,128,0.1593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,32,0.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,256,0.1610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,4096,0.1863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,64,0.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,2048,0.3210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,4096,0.2074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,8192,0.2196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,32768,0.2861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,65536,0.3909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,1024,0.1757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,2,0.1806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,512,0.1672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,8,0.1817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,16384,0.2399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,131072,0.5785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,32,0.1802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,64,0.1808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,256,0.1910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,4,0.1816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,16,0.1839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,2048,0.3832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,128,0.1885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,4096,0.2739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,512,0.2019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,16384,0.3211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,8192,0.2907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,32768,0.3908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,1024,0.2187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,4,0.2437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,2,0.2444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,16,0.2482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,8,0.2442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,64,0.2488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,128,0.2545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,256,0.2648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,512,0.2859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,131072,0.8025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,32,0.2436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,4096,0.4114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,1024,0.3232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,65536,0.5328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,2048,0.6321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,8192,0.4427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,16384,0.5030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,65536,0.8522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,4,0.3554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,2,0.3501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,32768,0.6171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,8,0.3508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,32,0.3610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,64,0.3582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,256,0.3952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,131072,1.4048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,128,0.3745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,1024,0.4966
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,4096,0.6747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,16384,0.8311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,512,0.4250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,32768,1.0647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,2048,1.0913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,8192,0.7286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,2,0.6483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,16,0.3555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,32,0.6501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,65536,1.5907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,8,0.6285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,4,0.6303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,64,0.6499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,128,0.6808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,512,0.7844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,1024,0.9107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,256,0.6947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,4096,1.2285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,16,0.6269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,2048,1.9511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,2,0.1343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,4,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,32768,2.0008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,8,0.1324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,8192,1.3531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,16,0.1379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,16384,1.5682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,64,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,32,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,512,0.1374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,256,0.1364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,1024,0.1439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,128,0.1445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,2048,0.2809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,16384,0.1759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,4096,0.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,8192,0.1631
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,2,0.1519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,32768,0.1981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,131072,0.3672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,4,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,8,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,64,0.1516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,65536,0.2544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,16,0.1439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,128,0.1446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,256,0.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,4096,0.1689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,512,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,8192,0.1757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,16384,0.1841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,65536,0.2668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,2048,0.2895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,32768,0.2220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,1024,0.1548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,4,0.1458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,8,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,32,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,32,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,2,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,256,0.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,131072,0.3807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,128,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,64,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,1024,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,2048,0.2892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,4096,0.1754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,16384,0.1915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,8192,0.1851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,65536,0.2811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,131072,0.3861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,32768,0.2173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,512,0.1510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,8,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,4,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,2,0.1412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,32,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,64,0.1444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,16,0.1490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,128,0.1460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,1024,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,512,0.1527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,256,0.1533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,4096,0.1772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,16384,0.1905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,2048,0.2886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,65536,0.2809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,131072,0.4006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,32768,0.2169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,8192,0.1841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,4,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,2,0.1332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,16,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,8,0.1330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,128,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,16,0.1500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,1024,0.1506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,32,0.1310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,64,0.1352
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,512,0.1466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,8192,0.1907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,256,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,32768,0.2246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,2048,0.2911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,65536,0.2976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,4096,0.1790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,131072,0.4295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,16384,0.1942
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,8,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,32,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,2,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,64,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,4,0.1232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,128,0.1293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,512,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,1024,0.1487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,2048,0.2994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,256,0.1288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,16384,0.2082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,4096,0.1848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,131072,0.4767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,65536,0.3261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,8192,0.1917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,32768,0.2420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,4,0.1322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,16,0.1328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,8,0.1330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,64,0.1330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,32,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,128,0.1398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,2,0.1329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,1024,0.1879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,512,0.1609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,16,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,8192,0.2576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,2048,0.3623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,32768,0.3303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,65536,0.4296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,256,0.1475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,131072,0.6193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,2,0.1571
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,16384,0.2810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,8,0.1582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,32,0.1553
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,16,0.1554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,64,0.1528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,256,0.1828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,4,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,512,0.2059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,2048,0.4650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,4096,0.3557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,4096,0.2504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,8192,0.3798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,1024,0.2542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,32768,0.4718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,16384,0.4122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,131072,0.8826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,4,0.2204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,8,0.2657
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,128,0.1675
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,16,0.2197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,64,0.2187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,128,0.2407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,256,0.2586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,32,0.2149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,1024,0.4006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,2048,0.8039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,512,0.3033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,4096,0.5892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,16384,0.6695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,32768,0.7948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,2,0.3375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,65536,1.0225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,131072,1.5730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,4,0.3252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,65536,0.6129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,8,0.3360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,8192,0.6145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,16,0.3374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,32,0.3453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,64,0.3363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,256,0.4251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,1024,0.6899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,512,0.5027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,128,0.3685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,8192,1.1285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,16384,1.2217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,2,0.2153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,2048,1.4899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,2,0.5487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,4,0.5695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,32768,1.4591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,8,0.5504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,16,0.5691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,32,0.5710
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,4096,1.0506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,64,0.5505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,128,0.6559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,256,0.7273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,65536,1.9573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,1024,1.2719
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,4096,2.0074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,2048,2.6917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,8192,2.0684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,512,0.9085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,2,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,16384,2.3291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,4,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,32,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,64,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,32768,2.7476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,8,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,16,0.1126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,256,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,1024,0.1208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,512,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,128,0.1118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,16384,0.1484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,8192,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,32768,0.1706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,131072,0.3429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,2,0.1211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,4,0.1284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,8,0.1211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,16,0.1280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,32,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,128,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,64,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,256,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,512,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,65536,0.2367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,1024,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,2048,0.2666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,4096,0.1461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,8192,0.1464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,32768,0.1901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,131072,0.3582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,65536,0.2552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,2048,0.2541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,4,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,4096,0.1365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,16,0.1292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,32,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,8,0.1270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,256,0.1243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,64,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,2,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,1024,0.1364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,2048,0.2669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,512,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,4096,0.1505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,16384,0.1630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,32768,0.1995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,16384,0.1639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,8192,0.1605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,2,0.1188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,4,0.1249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,65536,0.2597
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,8,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,32,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,64,0.1192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,131072,0.3681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,512,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,256,0.1281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,1024,0.1301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,16,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,2048,0.2633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,8192,0.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,128,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,4096,0.1565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,65536,0.2669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,32768,0.2011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,131072,0.3788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,16384,0.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,4,0.1234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,128,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,32,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,2,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,8,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,64,0.1233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,128,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,1024,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,16,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,2048,0.2767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,256,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,8192,0.1685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,512,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,32768,0.2119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,16384,0.1821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,2,0.1061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,65536,0.2854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,131072,0.4110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,8,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,4096,0.1664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,32,0.1063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,64,0.1080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,4,0.1076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,256,0.1192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,16,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,2048,0.2740
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,512,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,128,0.1127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,4096,0.1607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,16384,0.1805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,1024,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,131072,0.4490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,8192,0.1676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,4,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,8,0.1090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,65536,0.3013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,2,0.1127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,64,0.1137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,32768,0.2164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,128,0.1227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,16,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,512,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,2048,0.2992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,32,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,8192,0.1971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,16384,0.2191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,256,0.1241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,32768,0.2644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,1024,0.1445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,65536,0.3673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,4,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,8,0.1300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,131072,0.5556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,4096,0.1864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,16,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,64,0.1359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,256,0.1498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,512,0.1650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,32,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,2048,0.3718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,8192,0.2785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,2,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,16384,0.3140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,4096,0.2666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,65536,0.5213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,2,0.1808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,1024,0.1959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,131072,0.7895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,8,0.1830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,128,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,32,0.1814
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,16,0.1803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,64,0.1809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,256,0.2118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,512,0.2381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,32768,0.3773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,4096,0.4039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,1024,0.2863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,16384,0.4941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,128,0.1954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,4,0.1848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,8192,0.4406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,2048,0.6150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,4,0.2805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,8,0.2818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,131072,1.3971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,2,0.2707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,65536,0.8372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,64,0.2812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,32768,0.6116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,32,0.2712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,256,0.3192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,16,0.2818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,512,0.3722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,128,0.3043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,4096,0.6994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,16384,0.8635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,1024,0.4671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,8192,0.7513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,2048,1.1203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,65536,1.6154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,32768,1.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,16,0.4464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,8,0.4502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,32,0.4479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,4,0.4485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,64,0.4653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,512,0.6425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,256,0.5445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,128,0.5133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,1024,0.8336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,2048,1.9909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,4096,1.2735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,2,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,4,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,32768,2.0243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,8,0.1056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,16384,1.5801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,16,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,2,0.4681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,64,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,8192,1.3853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,256,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,1024,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,512,0.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,128,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,4096,0.1212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,32,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,8192,0.1288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,16384,0.1354
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,131072,0.3251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,65536,0.2244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,2048,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,4,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,8,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,32768,0.1643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,16,0.1117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,32,0.1126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,64,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,128,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,512,0.1160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,256,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,1024,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,4096,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,2,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,2048,0.2456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,16384,0.1505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,131072,0.3458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,32768,0.1757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,2,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,8,0.1147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,65536,0.2299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,16,0.1183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,32,0.1170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,4,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,64,0.1162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,8192,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,512,0.1142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,1024,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,4096,0.1324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,8192,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,16384,0.1460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,128,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,32768,0.1759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,256,0.1163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,131072,0.3534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,65536,0.2489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,8,0.1104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,16,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,2048,0.2513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,32,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,128,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,64,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,4,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,2,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,2048,0.2506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,512,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,1024,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,256,0.1119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,32768,0.1898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,8192,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,131072,0.3678
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,16384,0.1504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,2,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,4,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,16,0.1128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,8,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,64,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,32,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,256,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,4096,0.1363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,512,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,65536,0.2515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,1024,0.1220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,8192,0.1572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,16384,0.1718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,32768,0.2006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,65536,0.2733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,131072,0.4025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,2048,0.2546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,4096,0.1448
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,2,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,8,0.1020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,64,0.1003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,32,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,128,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,4,0.1019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,256,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,1024,0.1209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,16,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,4096,0.1500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,2048,0.2616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,128,0.1165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,8192,0.1564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,512,0.1120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,65536,0.2904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,2,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,131072,0.4385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,8,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,16,0.1041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,32768,0.2075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,64,0.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,128,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,16384,0.1716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,512,0.1225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,1024,0.1336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,4,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,2048,0.2815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,8192,0.1802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,16384,0.1988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,256,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,4096,0.1680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,32768,0.2432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,32,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,2,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,65536,0.3482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,131072,0.5376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,4,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,64,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,128,0.1245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,256,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,16,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,512,0.1477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,8,0.1148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,2048,0.3218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,8192,0.2291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,4096,0.2116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,32,0.1137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,65536,0.4699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,16384,0.2586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,2,0.1568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,4,0.1575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,8,0.1558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,131072,0.7412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,16,0.1594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,64,0.1622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,128,0.1740
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,32,0.1583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,256,0.1829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,32768,0.3276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,4096,0.3186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,1024,0.1641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,2048,0.5334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,8192,0.3474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,512,0.2041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,32768,0.5273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,1024,0.2374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,65536,0.7530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,16384,0.4045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,2,0.2384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,4,0.2385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,32,0.2517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,8,0.2416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,131072,1.3135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,128,0.2591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,512,0.3105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,256,0.2769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,64,0.2421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,1024,0.3803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,8192,0.5979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,16,0.2509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,2048,0.9580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,16384,0.7062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,4096,0.5586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,4,0.3938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,65536,1.4633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,16,0.3941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,32768,0.9380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,32,0.3938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,8,0.4147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,256,0.4545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,2,0.4133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,512,0.5150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,64,0.4145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,1024,0.6329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,4096,0.9689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,128,0.4249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,4,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,16384,1.2803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,32768,1.6985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,2048,1.6554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,2,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,8192,1.0462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,8,0.0965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,16,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,32,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,256,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,128,0.0959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,2048,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,512,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,8192,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,1024,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,4096,0.1158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,16384,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,65536,0.2115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,64,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,131072,0.3232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,32768,0.1518
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,16,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,8,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,64,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,2,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,128,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,256,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,1024,0.1122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,4096,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,8192,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,4,0.1088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,2048,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,32,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,65536,0.2290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,2,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,16384,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,8,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,32768,0.1683
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,4,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,64,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,131072,0.3328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,16,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,256,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,512,0.1105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,512,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,2048,0.2464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,32,0.1109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,8192,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,128,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,32768,0.1718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,65536,0.2345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,131072,0.3514
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,4,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,2,0.1109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,8,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,4096,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,64,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,16384,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,16,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,1024,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,512,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,1024,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,32,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,8192,0.1331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,2048,0.2425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,256,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,65536,0.2493
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,131072,0.3607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,128,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,32768,0.1760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,2,0.1080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,4,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,4096,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,16,0.1085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,16384,0.1445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,64,0.1088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,32,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,128,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,2048,0.2534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,512,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,1024,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,256,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,32768,0.1970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,8,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,65536,0.2701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,8192,0.1516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,16384,0.1612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,2,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,4,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,4096,0.1359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,8,0.0999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,16,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,32,0.0978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,128,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,131072,0.3975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,1024,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,256,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,2048,0.2551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,64,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,4096,0.1438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,512,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,65536,0.2843
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,16384,0.1645
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,131072,0.4343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,8,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,16,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,32768,0.2012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,4,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,32,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,8192,0.1518
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,256,0.1069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,512,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,128,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,16384,0.1952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,64,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,2048,0.2750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,8192,0.1737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,131072,0.5344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,2,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,2,0.1007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,65536,0.3442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,1024,0.1306
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,32768,0.2415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,8,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,128,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,64,0.1105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,4,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,512,0.1377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,256,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,4096,0.1656
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,16,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,2048,0.3179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,4096,0.2081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,8192,0.2238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,32,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,4,0.1494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,1024,0.1584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,16384,0.2576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,65536,0.4677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,32768,0.3246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,2,0.1477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,64,0.1559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,131072,0.7369
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,32,0.1515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,2048,0.5271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,4096,0.3122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,16,0.1502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,8,0.1543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,512,0.1938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,1024,0.2316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,16384,0.3995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,8192,0.3426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,128,0.1715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,2,0.2406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,8,0.2316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,16,0.2424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,32768,0.5153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,4,0.2304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,65536,0.7528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,131072,1.2998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,64,0.2364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,512,0.3124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,2048,0.9434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,8192,0.5982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,256,0.1799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,128,0.2628
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,32,0.2427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,16384,0.6973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,4,0.4003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,256,0.2702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,65536,1.4532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,32768,0.9423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,2,0.3989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,4096,0.5381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,1024,0.3611
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,8,0.3792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,256,0.4558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,64,0.3813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,32,0.3777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,16,0.4002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,512,0.4982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,128,0.4286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,16384,1.2409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,1024,0.6144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,4096,0.9454
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,8192,1.0299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,32768,1.6967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,32,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,256,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,512,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,2048,1.6350
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,8,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,4,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,16,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,1024,0.0982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,8192,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,2048,0.2286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,64,0.0913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,4096,0.1104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,2,0.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,16384,0.1241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,128,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,2,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,65536,0.2056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,8,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,128,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,131072,0.3139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,32,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,64,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,4,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,2048,0.1109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,4096,0.1190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,16,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,256,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,16384,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,131072,0.3290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,512,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,8192,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,1024,0.1060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,2,0.1044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,4,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,65536,0.2204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,16,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,128,0.1081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,256,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,8,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,1024,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,2048,0.2395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,64,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,4096,0.1204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,16384,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,32,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,512,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,32768,0.1658
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,65536,0.2257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,32768,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,2,0.1041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,131072,0.3402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,32768,0.1527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,4,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,16,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,8192,0.1296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,64,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,8,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,128,0.1018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,256,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,32,0.1017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,4096,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,1024,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,8192,0.1272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,131072,0.3604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,65536,0.2428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,512,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,32768,0.1723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,8,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,2,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,2048,0.2403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,16384,0.1368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,32,0.1044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,64,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,4,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,128,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,256,0.1065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,16,0.1040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,1024,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,8192,0.1397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,32768,0.1895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,4096,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,16384,0.1608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,2,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,65536,0.2644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,32,0.0937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,64,0.0941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,512,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,4,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,1024,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,8,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,131072,0.3930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,256,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,128,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,512,0.0999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,2048,0.2447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,2048,0.2503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,16,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,8192,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,32768,0.1993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,65536,0.2813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,131072,0.4308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,16,0.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,32,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,8,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,64,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,128,0.0997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,256,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,4,0.0960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,512,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,1024,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,16384,0.1617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,4096,0.1391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,2,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,8192,0.1704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,2,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,65536,0.3408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,4096,0.1622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,32768,0.2390
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,2048,0.2738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,131072,0.5301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,4,0.1085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,16384,0.1913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,256,0.1252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,64,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,128,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,1024,0.1552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,2048,0.3210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,512,0.1383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,8,0.1107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,32768,0.3253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,8192,0.2250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,131072,0.7386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,32,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,16,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,16384,0.2535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,16,0.1474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,4,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,4096,0.2091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,32,0.1502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,65536,0.4625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,256,0.1767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,8,0.1492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,128,0.1630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,64,0.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,512,0.1948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,2,0.1504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,8192,0.3435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,2048,0.5214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,16384,0.3986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,65536,0.7502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,1024,0.2300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,2,0.2357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,4096,0.3135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,64,0.2434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,32,0.2432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,32768,0.5139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,16,0.2377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,512,0.3102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,8,0.2271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,1024,0.3558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,2048,0.9519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,4096,0.5433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,4,0.2258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,256,0.2775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,16384,0.6959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,128,0.2504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,8,0.3748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,131072,1.3013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,65536,1.4575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,2,0.3962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,16,0.3751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,32768,0.9366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,128,0.4236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,64,0.3806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,4,0.3751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,512,0.4906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,32,0.3733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,1024,0.6221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,4096,0.9213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,16384,1.2318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,2,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,4,0.0916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,32768,1.6685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,32,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,64,0.0921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,128,0.0935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,512,0.0933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,256,0.4537
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,1024,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,2048,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,4096,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,8192,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,16384,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,32768,0.1475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,65536,0.2048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,131072,0.3063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,4,0.1005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,8,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,16,0.1008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,32,0.1001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,8192,0.5858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,16,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,2048,1.6499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,8192,1.0200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,64,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,512,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,256,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,256,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,128,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,2048,0.2360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,8,0.0945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,8192,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,4096,0.1213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,1024,0.1021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,2,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,131072,0.3244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,4,0.1026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,32768,0.1632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,2,0.1008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,16,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,65536,0.2201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,16384,0.1328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,256,0.1024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,512,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,2048,0.2384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,128,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,1024,0.1056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,8192,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,4096,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,32768,0.1625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,16384,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,131072,0.3428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,2,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,4,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,8,0.1002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,16,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,64,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,128,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,256,0.1019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,512,0.1057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,1024,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,2048,0.2363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,16384,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,32,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,32,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,4096,0.1210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,8,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,64,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,65536,0.2432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,131072,0.3565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,8192,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,2,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,32768,0.1681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,32,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,4,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,128,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,16,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,256,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,1024,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,2048,0.2452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,65536,0.2202
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,4096,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,8192,0.1354
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,16384,0.1506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,32768,0.1920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,64,0.1018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,65536,0.2604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,512,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,4,0.0915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,16,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,8,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,2,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,131072,0.3924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,256,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,64,0.0917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,1024,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,128,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,512,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,8192,0.1459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,2048,0.2480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,32768,0.1954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,4096,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,65536,0.2794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,8,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,2,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,8,0.0958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,131072,0.4272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,16,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,16384,0.1608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,64,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,128,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,256,0.1017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,1024,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,512,0.1079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,4,0.0955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,32,0.0965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,8192,0.1699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,2048,0.2708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,65536,0.3400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,16384,0.1907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,2,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,4,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,4096,0.1607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,16,0.1064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,32768,0.2367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,32,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,64,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,128,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,8,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,512,0.1371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,4096,0.2068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,2048,0.3134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,8192,0.2227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,1024,0.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,256,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,32768,0.3202
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,32,0.0923
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,131072,0.5282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,4,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,65536,0.4598
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,8,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,16,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,64,0.1509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,128,0.1591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,512,0.1919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,131072,0.7340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,1024,0.2271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,2048,0.5186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,32,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,16384,0.2538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,8192,0.3411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,2,0.1467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,32768,0.5134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,2,0.2302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,65536,0.7398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,131072,1.2981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,16384,0.3985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,4,0.2319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,8,0.2199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,16,0.2310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,32,0.2254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,64,0.2286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,512,0.2955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,256,0.2735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,128,0.2549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,4096,0.5285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,2048,0.9407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,1024,0.3634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,32768,0.9335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,65536,1.4275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,16384,0.6997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,256,0.1753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,16,0.3667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,2,0.3676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,8,0.3875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,4,0.3865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,128,0.3955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,32,0.3663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,8192,0.5775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,512,0.4996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,64,0.3881
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,256,0.4234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,1024,0.5963
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,8192,1.0084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,2048,1.6435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,2,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,4,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,8,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,32768,1.6780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,4096,0.3101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,16,0.0890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,64,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,128,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,32,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,16384,1.2471
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,4096,0.9100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,4096,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,2048,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,8192,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,1024,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,32768,0.1486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,131072,0.3139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,256,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,512,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,65536,0.2047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,8,0.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,16,0.1065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,64,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,128,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,256,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,512,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,2,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,32,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,16384,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,4,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,2048,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,8192,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,65536,0.2189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,32768,0.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,1024,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,2,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,8,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,16,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,16384,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,32,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,131072,0.3184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,4096,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,256,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,128,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,512,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,2048,0.2320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,1024,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,4096,0.1167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,16384,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,8192,0.1225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,64,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,4,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,2,0.0983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,32768,0.1603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,65536,0.2190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,32,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,64,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,8,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,16,0.0976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,256,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,512,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,1024,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,128,0.1003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,2048,0.2368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,4096,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,32768,0.1661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,16384,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,65536,0.2383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,2,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,131072,0.3602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,8192,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,4,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,16,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,8,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,128,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,64,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,256,0.1022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,1024,0.1081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,512,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,32,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,4096,0.1304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,8192,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,16384,0.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,32768,0.1870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,65536,0.2582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,2,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,131072,0.3903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,2048,0.2398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,4,0.1028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,8,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,16,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,32,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,256,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,128,0.0914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,64,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,512,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,4096,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,1024,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,2048,0.2455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,8192,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,32768,0.1957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,16384,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,131072,0.4271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,4,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,65536,0.2779
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,2,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,16,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,32,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,64,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,256,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,512,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,128,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,8,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,2048,0.2701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,131072,0.3352
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,16384,0.1896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,8192,0.1690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,32768,0.2369
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,1024,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,4096,0.1585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,131072,0.5259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,2,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,4,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,16,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,8,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,65536,0.3387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,32,0.1103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,64,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,4,0.0926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,128,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,1024,0.1523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,512,0.1363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,4096,0.2074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,16384,0.2506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,256,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,65536,0.4614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,32768,0.3184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,131072,0.7357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,2,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,8,0.1458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,4,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,2048,0.3160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,8192,0.2220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,64,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,32,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,128,0.1575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,1024,0.2232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,16,0.1405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,256,0.1765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,4096,0.3075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,8192,0.3386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,16384,0.3934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,32768,0.5135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,2048,0.5197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,512,0.1876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,65536,0.7405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,8,0.2195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,4,0.2270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,16,0.2304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,64,0.2244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,32,0.2217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,2,0.2179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,128,0.2428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,4096,0.5268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,2048,0.9355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,131072,1.3023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,512,0.3023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,1024,0.3599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,8192,0.5754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,16384,0.6875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,256,0.2612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,32768,0.9259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,8,0.3825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,4,0.3832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,16,0.3607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,32,0.3629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,2,0.3838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,128,0.3903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,1024,0.5858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,256,0.4380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,65536,1.4153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,64,0.3692
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,2048,1.6199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,4096,0.9241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,8192,1.0024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,16384,1.2155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,4,0.1676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,8,0.1690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,32768,1.6740
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,512,0.4957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,32,0.1717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,16,0.1628
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,2,0.1724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,64,0.1723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,256,0.1718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,512,0.1799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,4096,0.1815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,128,0.1695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,32768,0.2318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,16384,0.2020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,1024,0.1821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,2048,0.1799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,2,0.1823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,65536,0.2947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,4,0.1787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,16,0.1846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,64,0.1769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,131072,0.3904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,8192,0.2003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,512,0.1832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,256,0.1809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,8,0.1762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,2048,0.1818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,1024,0.1891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,16384,0.2175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,8192,0.2099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,32,0.1790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,65536,0.2987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,2,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,4096,0.2064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,32768,0.2386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,8,0.1783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,32,0.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,4,0.1824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,16,0.1726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,256,0.1801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,128,0.1858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,512,0.1853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,1024,0.1891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,64,0.1886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,4096,0.2024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,131072,0.4013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,8192,0.2023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,2048,0.1900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,32768,0.2473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,65536,0.3147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,16384,0.2238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,8,0.1480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,4,0.1464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,131072,0.4064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,2,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,16,0.1527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,32,0.1497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,128,0.1491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,512,0.1555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,1024,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,4096,0.1755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,8192,0.1787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,64,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,16384,0.1894
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,65536,0.2810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,131072,0.3960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,2048,0.2893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,2,0.1522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,32768,0.2161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,4,0.1510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,16,0.1507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,8,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,64,0.1509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,128,0.1530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,512,0.1579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,1024,0.1632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,256,0.1545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,2048,0.2918
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,8192,0.1850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,4096,0.1805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,16384,0.1982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,32768,0.2246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,131072,0.4262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,65536,0.2972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,4,0.1451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,2,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,8,0.1434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,16,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,32,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,64,0.1439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,256,0.1511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,128,0.1820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,512,0.1521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,1024,0.1560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,128,0.1502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,2048,0.2887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,4096,0.1753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,8192,0.1825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,16384,0.1965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,131072,0.4643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,32768,0.2337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,4,0.1523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,2,0.1525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,8,0.1539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,256,0.1515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,16,0.1565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,32,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,128,0.1545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,32,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,64,0.1525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,512,0.1567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,1024,0.1619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,2048,0.2989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,4096,0.1880
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,8192,0.1976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,256,0.1554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,32768,0.2633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,16384,0.2175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,4,0.1809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,65536,0.3673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,8,0.1827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,131072,0.5572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,32,0.1807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,128,0.1829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,16,0.1807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,256,0.1869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,1024,0.1962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,2,0.1826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,512,0.1891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,64,0.1794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,8192,0.2531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,4096,0.2350
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,2048,0.3442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,65536,0.4930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,2,0.2506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,131072,0.7639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,4,0.2453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,8,0.2454
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,16,0.2493
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,16384,0.2821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,64,0.2453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,256,0.2546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,128,0.2493
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,32768,0.3510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,1024,0.2812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,2048,0.5599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,8192,0.3728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,32,0.2501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,4096,0.3428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,16384,0.4367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,512,0.2671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,4,0.3648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,65536,0.7803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,131072,1.3401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,16,0.3742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,32768,0.5490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,2,0.3741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,8,0.3752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,32,0.3607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,128,0.3735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,256,0.3913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,512,0.3998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,2048,0.9745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,64,0.3684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,1024,0.4404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,16384,0.7262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,8192,0.6073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,4096,0.5520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,65536,1.4839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,65536,0.3169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,32768,0.9472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,2,0.6665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,8,0.6828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,16,0.6630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,32,0.6670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,4,0.6830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,512,0.7296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,64,0.6883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,1024,0.8149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,4096,1.0106
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,2048,1.7327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,128,0.6806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,256,0.6980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,16384,1.3294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,2,0.1346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,4,0.1383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,16,0.1322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,32768,1.7821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,8,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,8192,1.1350
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,32,0.1388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,128,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,64,0.1342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,512,0.1364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,256,0.1409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,2048,0.1478
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,32768,0.1937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,8192,0.1653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,16384,0.1674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,131072,0.3640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,65536,0.2520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,2,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,4096,0.1623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,4,0.1488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,16,0.1424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,64,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,1024,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,512,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,128,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,32,0.1473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,256,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,1024,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,2048,0.2782
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,4096,0.1652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,16384,0.1792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,8192,0.1686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,32768,0.2103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,8,0.1460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,65536,0.2711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,16,0.1470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,8,0.1465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,64,0.1497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,2,0.1459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,128,0.1510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,4,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,512,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,2048,0.2808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,1024,0.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,32,0.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,8192,0.1754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,256,0.1460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,32768,0.2090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,16384,0.1840
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,4096,0.1729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,4,0.1397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,131072,0.3814
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,2,0.1407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,65536,0.2732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,64,0.1453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,128,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,32,0.1449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,256,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,512,0.1490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,8,0.1461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,131072,0.3658
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,4096,0.1685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,1024,0.1500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,8192,0.1689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,16384,0.1857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,65536,0.2684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,2048,0.2821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,16,0.1456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,8,0.1330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,2,0.1301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,16,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,32,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,64,0.1312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,256,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,131072,0.3922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,512,0.1356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,4,0.1301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,4096,0.1607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,128,0.1344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,8192,0.1706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,16384,0.1783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,1024,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,131072,0.4121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,32768,0.2143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,32768,0.2090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,65536,0.2784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,2,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,8,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,4,0.1202
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,64,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,16,0.1208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,256,0.1234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,128,0.1206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,1024,0.1325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,4096,0.1616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,8192,0.1685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,16384,0.1826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,512,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,2048,0.2742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,32768,0.2189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,2,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,65536,0.3013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,32,0.1224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,131072,0.4504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,32,0.1272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,16,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,64,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,2048,0.2751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,8,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,128,0.1322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,256,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,512,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,2048,0.3062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,4,0.1275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,16384,0.2261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,8192,0.2063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,32768,0.2706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,65536,0.3738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,1024,0.1544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,4096,0.1977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,64,0.1520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,128,0.1612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,256,0.1660
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,512,0.1789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,1024,0.2015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,2048,0.3767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,4096,0.2673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,8192,0.2858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,16384,0.3146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,32768,0.3820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,65536,0.5250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,131072,0.7968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,2,0.2091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,8,0.2089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,131072,0.5641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,4,0.1504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,8,0.1504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,16,0.1491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,32,0.1505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,2,0.1498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,16,0.2089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,32,0.2054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,512,0.2536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,4,0.2093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,1024,0.2891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,128,0.2200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,4096,0.3999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,8192,0.4337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,16384,0.4916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,65536,0.8331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,32768,0.6033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,64,0.2056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,2,0.3166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,131072,1.3930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,2048,0.6185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,4,0.3191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,8,0.3087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,16,0.3062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,128,0.3374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,64,0.3171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,1024,0.4953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,256,0.3631
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,512,0.3981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,32,0.3058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,2048,1.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,4096,0.7012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,32768,1.1068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,8192,0.7629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,2,0.5167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,65536,1.6074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,4,0.5176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,8,0.5144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,16384,0.8753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,16,0.5369
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,128,0.5817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,512,0.6951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,256,0.6118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,64,0.5181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,1024,0.8954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,2048,2.0203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,4096,1.2794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,32,0.5338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,16384,1.5966
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,256,0.2304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,8192,1.3818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,4,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,2,0.1121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,32768,2.0300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,32,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,64,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,16,0.1164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,256,0.1141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,128,0.1166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,2048,0.2491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,4096,0.1293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,512,0.1146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,1024,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,65536,0.2324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,8192,0.1414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,8,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,2,0.1232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,16384,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,32768,0.1739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,8,0.1265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,131072,0.3305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,32,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,256,0.1263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,64,0.1232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,128,0.1269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,1024,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,2048,0.2630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,4,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,4096,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,8192,0.1492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,65536,0.2508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,131072,0.3471
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,32768,0.1871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,2,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,16384,0.1603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,8,0.1275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,4,0.1222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,16,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,32,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,128,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,16,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,256,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,512,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,1024,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,4096,0.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,2048,0.2613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,64,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,8192,0.1531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,32768,0.1864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,16384,0.1629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,131072,0.3626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,512,0.1295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,2,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,32,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,64,0.1201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,4,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,16,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,8,0.1221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,1024,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,128,0.1243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,256,0.1244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,2048,0.2589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,4096,0.1438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,16384,0.1655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,32768,0.1926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,65536,0.2592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,8192,0.1548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,512,0.1268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,131072,0.3731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,8,0.1260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,32,0.1267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,16,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,64,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,4,0.1260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,256,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,2,0.1221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,128,0.1230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,4096,0.1522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,1024,0.1308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,2048,0.2651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,65536,0.2518
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,512,0.1300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,8192,0.1606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,32768,0.1988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,65536,0.2732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,131072,0.4007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,2,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,16384,0.1686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,64,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,32,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,4,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,128,0.1117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,256,0.1121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,16,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,2048,0.2557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,4096,0.1430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,512,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,16384,0.1667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,32768,0.2012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,1024,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,131072,0.4319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,65536,0.2842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,8,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,8,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,8192,0.1530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,2,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,16,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,256,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,128,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,1024,0.1283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,512,0.1220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,4096,0.1583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,8192,0.1691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,2048,0.2698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,32768,0.2381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,16384,0.1905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,65536,0.3395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,131072,0.5284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,2,0.1288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,4,0.1301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,16,0.1273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,8,0.1295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,32,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,64,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,256,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,512,0.1474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,128,0.1365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,1024,0.1585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,4096,0.2038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,2048,0.3163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,8192,0.2233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,16384,0.2526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,65536,0.4602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,32768,0.3188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,2,0.1788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,131072,0.7335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,4,0.1751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,8,0.1746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,16,0.1732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,32,0.1788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,64,0.1786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,128,0.1846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,256,0.1893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,512,0.2001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,1024,0.2312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,2048,0.5203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,4096,0.3057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,8192,0.3320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,16384,0.3895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,32768,0.5125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,2,0.2593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,65536,0.7434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,4,0.2608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,8,0.2693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,131072,1.2980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,16,0.2590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,4,0.1109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,32,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,64,0.2592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,256,0.2846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,128,0.2868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,64,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,32,0.2710
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,4096,0.5031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,1024,0.3577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,16384,0.6751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,8192,0.5546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,32768,0.9048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,2048,0.9141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,2,0.4285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,8,0.4483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,65536,1.4274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,16,0.4258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,4,0.4299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,32,0.4274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,64,0.4474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,512,0.5257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,1024,0.6409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,2048,1.6072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,4096,0.8854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,128,0.4616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,8192,1.0102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,4,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,16384,1.2027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,32768,1.6517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,8,0.1098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,2,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,16,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,32,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,64,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,256,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,512,0.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,128,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,1024,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,2048,0.2362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,256,0.4994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,512,0.3099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,16384,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,8192,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,65536,0.2200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,32768,0.1667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,4096,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,2,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,8,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,4,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,32,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,64,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,256,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,512,0.1147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,131072,0.3191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,128,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,8192,0.1321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,4096,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,1024,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,32768,0.1699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,65536,0.2344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,16384,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,2,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,131072,0.3325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,4,0.1145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,32,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,64,0.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,16,0.1178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,128,0.1137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,512,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,256,0.1158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,1024,0.1179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,8192,0.1398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,4096,0.1363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,32768,0.1757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,65536,0.2425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,8,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,2048,0.2465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,2,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,4,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,16384,0.1477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,16,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,32,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,2048,0.2449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,512,0.1099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,1024,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,64,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,256,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,4096,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,2048,0.2466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,8192,0.1364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,32768,0.1797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,16384,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,128,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,65536,0.2461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,131072,0.3632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,4,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,16,0.1150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,32,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,8,0.1128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,64,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,256,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,128,0.1141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,512,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,1024,0.1181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,131072,0.3555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,2048,0.2507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,8192,0.1480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,16384,0.1596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,2,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,32768,0.1930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,4096,0.1403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,4,0.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,65536,0.2648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,131072,0.3911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,16,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,8,0.0997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,64,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,8,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,128,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,256,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,512,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,4096,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,8192,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,1024,0.1082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,32,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,16384,0.1571
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,32768,0.1952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,131072,0.4237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,4,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,2048,0.2441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,8,0.1005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,2,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,32,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,65536,0.2759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,64,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,128,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,256,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,512,0.1125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,16,0.1017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,8192,0.1568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,1024,0.1183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,2048,0.2583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,4096,0.1467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,2,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,131072,0.5145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,32768,0.2255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,2,0.1128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,65536,0.3271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,16384,0.1781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,4,0.1127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,8,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,128,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,256,0.1223
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,512,0.1323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,64,0.1167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,16,0.1142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,1024,0.1395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,32,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,16384,0.2240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,2048,0.2842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,131072,0.7023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,32768,0.2901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,8192,0.1921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,65536,0.4331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,4096,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,4,0.1567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,64,0.1566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,32,0.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,8,0.1535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,256,0.1735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,2,0.1515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,128,0.1616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,1024,0.1924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,2048,0.4654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,16,0.1540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,512,0.1829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,4096,0.2522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,16384,0.3384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,32768,0.4567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,2,0.2320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,131072,1.2455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,8192,0.2854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,32,0.2316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,65536,0.6871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,64,0.2348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,8,0.2321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,128,0.2568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,16,0.2339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,4,0.2433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,1024,0.3012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,2048,0.8324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,512,0.2695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,4096,0.4218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,256,0.2545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,32768,0.8227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,2,0.3840
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,8,0.3816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,4,0.3826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,65536,1.3377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,16384,0.5823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,32,0.4022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,64,0.3834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,256,0.4157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,8192,0.4826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,2048,1.4490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,1024,0.5073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,128,0.3990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,16,0.3853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,8192,0.8184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,16384,1.0487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,4,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,512,0.4447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,32768,1.4608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,16,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,8,0.1042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,2,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,4096,0.7130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,1024,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,64,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,32,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,2048,0.2263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,256,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,4096,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,512,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,16384,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,32768,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,65536,0.2114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,2,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,8192,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,131072,0.3176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,16,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,32,0.1083
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,128,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,8,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,128,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,256,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,4,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,2048,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,4096,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,16384,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,64,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,65536,0.2229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,8192,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,2,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,512,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,131072,0.3364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,4,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,16,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,8,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,128,0.1143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,256,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,32,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,512,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,1024,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,4096,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,2048,0.2462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,8192,0.1341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,32768,0.1655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,32768,0.1692
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,16384,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,65536,0.2331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,64,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,1024,0.1128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,8,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,4,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,32,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,128,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,16,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,256,0.1065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,1024,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,131072,0.3493
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,64,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,512,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,8192,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,32768,0.1768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,2,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,65536,0.2404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,131072,0.3570
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,2,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,16384,0.1437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,8,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,16,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,2048,0.2385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,64,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,128,0.1102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,32,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,4096,0.1261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,1024,0.1125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,512,0.1098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,2048,0.2444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,256,0.1120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,4,0.1120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,16384,0.1543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,8192,0.1392
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,65536,0.2576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,2,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,32768,0.1869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,4096,0.1324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,131072,0.3874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,4,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,16,0.0986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,32,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,8,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,512,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,1024,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,256,0.0979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,128,0.0974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,8192,0.1354
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,16384,0.1510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,2048,0.2383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,65536,0.2727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,131072,0.4199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,4,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,8,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,2,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,16,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,32,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,64,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,256,0.1019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,512,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,32768,0.1882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,128,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,4096,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,2048,0.2546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,1024,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,8192,0.1539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,64,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,16384,0.1724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,32768,0.2239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,4,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,2,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,4096,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,16,0.1084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,65536,0.3226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,128,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,8,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,131072,0.5116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,512,0.1234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,1024,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,256,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,4096,0.1748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,32,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,16384,0.2184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,8192,0.1887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,2048,0.2830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,4,0.1426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,131072,0.7009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,64,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,8,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,32768,0.2876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,65536,0.4290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,2,0.1461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,256,0.1627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,32,0.1425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,16,0.1446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,64,0.1506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,512,0.1775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,8192,0.2746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,4096,0.2468
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,128,0.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,2048,0.4609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,16384,0.3325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,1024,0.1881
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,65536,0.6864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,32768,0.4552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,2,0.2224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,8,0.2234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,4,0.2203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,16,0.2218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,64,0.2363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,131072,1.2368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,128,0.2465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,1024,0.2924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,512,0.2633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,2048,0.8218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,256,0.2457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,32,0.2221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,8192,0.4646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,4096,0.4209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,4,0.3633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,32768,0.8028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,65536,1.3313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,16384,0.5833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,16,0.3635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,32,0.3825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,8,0.3641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,2,0.3631
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,128,0.3822
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,512,0.4263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,64,0.3626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,4096,0.6932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,1024,0.5058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,8192,0.7951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,256,0.3980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,2,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,4,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,16384,1.0092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,32,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,64,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,32768,1.4401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,2048,1.4297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,128,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,256,0.0953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,1024,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,512,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,2048,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,4096,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,16,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,16384,0.1248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,65536,0.2095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,32768,0.1477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,2,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,8,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,8,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,32,0.1020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,64,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,131072,0.3122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,256,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,8192,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,1024,0.1028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,512,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,16,0.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,2048,0.2405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,16384,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,4096,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,4,0.1076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,65536,0.2177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,128,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,4,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,131072,0.3264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,32768,0.1584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,16,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,2,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,8192,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,8,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,128,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,256,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,2048,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,32,0.1060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,4096,0.1241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,16384,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,32768,0.1668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,8192,0.1300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,65536,0.2273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,2,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,131072,0.3424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,8,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,4,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,16,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,64,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,32,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,128,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,1024,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,512,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,1024,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,256,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,64,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,512,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,2048,0.2341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,16384,0.1379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,4096,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,131072,0.3532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,4,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,32768,0.1692
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,65536,0.2387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,16,0.1061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,64,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,8,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,32,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,2,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,1024,0.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,128,0.1060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,256,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,2048,0.2406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,4096,0.1250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,512,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,8192,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,131072,0.3847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,65536,0.2554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,8192,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,32768,0.1830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,2,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,16384,0.1451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,64,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,32,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,4,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,512,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,256,0.0943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,2048,0.2320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,1024,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,128,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,8,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,32768,0.1841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,16384,0.1485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,65536,0.2689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,131072,0.4156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,2,0.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,8192,0.1306
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,16,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,8,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,4,0.0958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,4096,0.1219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,64,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,128,0.0956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,16,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,512,0.1025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,32,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,8192,0.1484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,256,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,16384,0.1698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,2048,0.2494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,4096,0.1408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,131072,0.5056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,2,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,8,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,32768,0.2172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,4,0.1076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,32,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,16,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,1024,0.1076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,256,0.1159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,1024,0.1349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,128,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,65536,0.3200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,512,0.1221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,2048,0.2833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,4096,0.1715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,16384,0.2177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,64,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,65536,0.4284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,32768,0.2839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,8192,0.1912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,8,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,131072,0.7014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,4,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,16,0.1399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,64,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,32,0.1436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,2,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,1024,0.1868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,256,0.1592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,4096,0.2431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,128,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,32768,0.4476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,16384,0.3342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,2048,0.4582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,512,0.1722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,8192,0.2727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,65536,0.6786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,8,0.2257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,131072,1.2333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,2,0.2257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,32,0.2206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,4,0.2149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,64,0.2219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,256,0.2429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,16,0.2183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,2048,0.8203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,1024,0.3006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,4096,0.4029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,512,0.2581
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,8192,0.4573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,32768,0.7995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,16384,0.5680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,2,0.3780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,16,0.3573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,8,0.3581
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,128,0.2421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,65536,1.3270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,32,0.3597
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,4,0.3577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,128,0.3753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,64,0.3772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,1024,0.4990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,512,0.4181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,8192,0.7867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,4096,0.6843
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,2048,1.4045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,256,0.3901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,2,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,4,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,16384,1.0013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,32,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,8,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,128,0.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,32768,1.4533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,256,0.0961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,64,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,16,0.0953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,512,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,4096,0.1140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,1024,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,16384,0.1219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,8192,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,65536,0.2096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,131072,0.3035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,32768,0.1465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,2,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,8,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,2048,0.2289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,16,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,64,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,128,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,512,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,32,0.1022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,1024,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,256,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,2048,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,16384,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,65536,0.2203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,8192,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,4,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,131072,0.3288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,2,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,32768,0.1580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,4,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,32,0.1017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,8,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,128,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,4096,0.1181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,64,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,512,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,2048,0.2381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,1024,0.1058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,256,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,16384,0.1377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,32768,0.1611
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,8192,0.1219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,65536,0.2258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,16,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,4,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,4096,0.1212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,32,0.1021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,16,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,8,0.1003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,64,0.0977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,256,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,1024,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,131072,0.3364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,2048,0.2331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,4096,0.1141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,128,0.1023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,32768,0.1624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,512,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,2,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,131072,0.3499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,8192,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,4,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,8,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,16,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,32,0.1021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,65536,0.2346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,128,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,2,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,64,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,256,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,512,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,2048,0.2380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,8192,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,1024,0.1042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,16384,0.1324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,32768,0.1809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,4096,0.1245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,16384,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,2,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,65536,0.2544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,4,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,32,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,16,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,128,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,512,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,64,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,2048,0.2299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,131072,0.3796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,256,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,8,0.0918
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,1024,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,8192,0.1303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,4096,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,32768,0.1823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,2,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,4,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,131072,0.4149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,8,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,32,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,64,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,65536,0.2652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,128,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,1024,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,16,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,256,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,4096,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,16384,0.1688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,8192,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,32768,0.2147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,16384,0.1459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,65536,0.3187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,2,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,4,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,131072,0.5067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,8,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,512,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,16,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,2048,0.2491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,64,0.1040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,256,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,2048,0.2767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,128,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,1024,0.1347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,8192,0.1865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,4096,0.1691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,65536,0.4252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,512,0.1183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,16384,0.2173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,2,0.1367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,131072,0.6984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,32768,0.2824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,8,0.1376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,4,0.1366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,32,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,16,0.1395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,128,0.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,256,0.1544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,1024,0.1832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,512,0.1682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,4096,0.2427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,64,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,8192,0.2752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,16384,0.3265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,32,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,32768,0.4483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,2,0.2107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,2048,0.4601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,65536,0.6746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,16,0.2153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,32,0.2129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,64,0.2169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,4,0.2209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,8,0.2097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,512,0.2539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,256,0.2370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,1024,0.2872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,2048,0.8149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,8192,0.4510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,16384,0.5641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,4096,0.4109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,128,0.2395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,2,0.3497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,131072,1.2352
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,4,0.3492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,16,0.3689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,32,0.3688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,65536,1.3227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,8,0.3474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,64,0.3498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,128,0.3685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,32768,0.7928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,512,0.4294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,2048,1.4118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,256,0.3817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,4096,0.6740
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,1024,0.4679
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,8192,0.7795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,8,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,4,0.0890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,2,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,32768,1.4236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,32,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,64,0.0926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,16,0.0891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,256,0.0913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,512,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,1024,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,4096,0.1081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,8192,0.1099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,32768,0.1459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,2048,0.0999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,16384,0.1199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,128,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,65536,0.2114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,16384,0.9915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,4,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,16,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,131072,0.3067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,32,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,2,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,8,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,128,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,64,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,1024,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,4096,0.1160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,8192,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,16384,0.1346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,65536,0.2200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,256,0.0998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,32768,0.1594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,131072,0.3174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,2,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,4,0.1038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,16,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,64,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,2048,0.2314
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,128,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,256,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,512,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,512,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,8,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,4096,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,8192,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,16384,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,2048,0.2320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,32768,0.1569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,65536,0.2213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,1024,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,131072,0.3318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,4,0.0979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,8,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,64,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,32,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,128,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,32,0.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,1024,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,16,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,2048,0.2343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,2,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,256,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,4096,0.1159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,32768,0.1633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,512,0.1003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,8192,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,16384,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,8,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,16,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,131072,0.3571
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,32,0.1025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,65536,0.2355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,64,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,256,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,128,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,4,0.1001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,4096,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,1024,0.1057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,8192,0.1290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,512,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,16384,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,2,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,65536,0.2497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,2048,0.2371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,4,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,32768,0.1772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,16,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,131072,0.3796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,64,0.0900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,32,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,8,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,512,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,128,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,2,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,1024,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,2048,0.2277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,256,0.0912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,8192,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,32768,0.1805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,16384,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,131072,0.4113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,4096,0.1143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,2,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,8,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,65536,0.2648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,16,0.0934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,128,0.0938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,4,0.0933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,256,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,512,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,2048,0.2486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,64,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,4096,0.1355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,32,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,1024,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,131072,0.5041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,32768,0.2148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,65536,0.3160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,2,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,16384,0.1663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,8192,0.1470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,16,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,32,0.1056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,256,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,64,0.1040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,512,0.1191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,8,0.1046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,128,0.1066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,1024,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,2048,0.2788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,4,0.1064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,16384,0.2161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,32768,0.2814
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,4096,0.1686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,8192,0.1855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,4,0.1383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,131072,0.6945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,2,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,8,0.1387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,65536,0.4251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,32,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,256,0.1522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,64,0.1367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,512,0.1682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,16,0.1362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,4096,0.2400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,1024,0.1849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,128,0.1459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,2048,0.4536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,65536,0.6736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,8192,0.2677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,4,0.2177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,2,0.2172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,16,0.2095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,131072,1.2318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,32,0.2076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,32768,0.4464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,16384,0.3254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,128,0.2348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,64,0.2230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,512,0.2518
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,8,0.2074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,2048,0.8087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,8192,0.4601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,1024,0.2816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,256,0.2348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,4096,0.4011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,2,0.3431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,8,0.3449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,32768,0.7909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,65536,1.3112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,32,0.3453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,64,0.3440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,4,0.3650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,16,0.3459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,128,0.3630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,256,0.3777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,512,0.4046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,4096,0.6885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,16384,0.5701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,1024,0.4635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,2048,1.3842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,2,0.1331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,8,0.1309
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,8192,0.7731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,32,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,32768,1.4171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,16,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,4,0.1282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,64,0.1323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,256,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,128,0.1336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,2048,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,8192,0.1658
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,16384,0.9852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,32768,0.1935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,65536,0.2544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,16384,0.1710
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,512,0.1392
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,131072,0.3626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,4,0.1399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,1024,0.1458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,8,0.1416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,32,0.1384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,64,0.1418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,16,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,256,0.1474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,1024,0.1488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,2048,0.1725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,2,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,4096,0.1706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,8192,0.1786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,32768,0.2066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,65536,0.2647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,131072,0.3702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,2,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,4,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,8,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,4096,0.1574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,16,0.1432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,64,0.1433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,128,0.1431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,512,0.1484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,512,0.1552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,1024,0.1570
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,16384,0.1782
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,256,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,32,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,4096,0.1679
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,128,0.1470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,32768,0.2071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,16384,0.1855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,131072,0.3775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,2,0.1427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,65536,0.2659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,16,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,32,0.1434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,8192,0.1731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,2048,0.1724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,64,0.1418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,512,0.1569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,8,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,2048,0.1691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,256,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,4,0.1439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,8192,0.1789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,128,0.1496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,32768,0.2134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,4096,0.1725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,16384,0.1871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,4,0.1464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,2,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,131072,0.3971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,8,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,65536,0.2793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,128,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,32,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,256,0.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,16,0.1475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,2048,0.1746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,1024,0.1605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,1024,0.1585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,4096,0.1777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,32768,0.2259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,64,0.1445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,65536,0.2962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,16384,0.1956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,512,0.1586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,2,0.1506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,8,0.1520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,8192,0.1847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,32,0.1530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,4,0.1530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,256,0.1588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,128,0.1521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,131072,0.4280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,64,0.1516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,16,0.1525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,8192,0.1964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,512,0.1616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,16384,0.2117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,65536,0.3321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,131072,0.4804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,4096,0.1915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,1024,0.1653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,32768,0.2485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,4,0.1586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,2,0.1582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,32,0.1584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,8,0.1594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,16,0.1573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,256,0.1630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,64,0.1589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,2048,0.2104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,4096,0.2136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,1024,0.1792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,512,0.1694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,2048,0.1873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,65536,0.3931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,128,0.1626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,8192,0.2226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,2,0.1761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,32768,0.2920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,8,0.1750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,131072,0.5843
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,16,0.1755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,16384,0.2464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,128,0.1828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,256,0.1852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,64,0.1758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,1024,0.2184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,4096,0.2742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,2048,0.2635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,4,0.1753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,32,0.1747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,512,0.1991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,8192,0.2885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,4,0.2155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,131072,0.8013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,32768,0.3855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,8,0.2199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,65536,0.5297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,32,0.2155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,16384,0.3189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,64,0.2165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,2,0.2158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,1024,0.2923
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,512,0.2597
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,16,0.2146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,128,0.2278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,16384,0.4878
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,4096,0.3876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,8192,0.4181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,256,0.2390
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,65536,0.8215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,2048,0.3743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,32768,0.5985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,2,0.2902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,4,0.2902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,16,0.3018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,128,0.3228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,131072,1.3836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,32,0.2909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,8,0.2899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,512,0.3617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,64,0.2914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,2048,0.5908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,8192,0.6632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,1024,0.4462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,4096,0.6104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,32768,1.0041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,2,0.5079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,4,0.4907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,16384,0.7717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,8,0.5086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,64,0.5093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,65536,1.5048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,32,0.4878
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,256,0.5520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,512,0.6391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,16,0.4879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,2048,1.0408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,256,0.3264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,128,0.5323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,1024,0.7874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,4096,1.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,8192,1.1969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,8,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,4,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,32768,1.8426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,16,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,16384,1.4283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,2,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,128,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,256,0.1090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,32,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,1024,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,512,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,8192,0.1343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,32768,0.1679
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,16384,0.1426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,131072,0.3267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,65536,0.2286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,2,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,64,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,2048,0.1291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,32,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,4,0.1140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,128,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,256,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,64,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,512,0.1182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,2048,0.1428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,1024,0.1211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,16,0.1127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,8192,0.1445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,16384,0.1564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,65536,0.2402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,8,0.1144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,32768,0.1823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,131072,0.3460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,4,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,4096,0.1265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,8,0.1170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,2,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,4096,0.1418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,128,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,32,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,16,0.1171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,2048,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,512,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,4096,0.1450
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,8192,0.1521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,32768,0.1944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,16384,0.1685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,64,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,256,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,65536,0.2541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,8,0.1189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,16,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,4,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,64,0.1170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,128,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,2,0.1172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,512,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,1024,0.1274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,256,0.1186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,4096,0.1540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,8192,0.1609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,32,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,1024,0.1242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,131072,0.3625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,131072,0.3802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,32768,0.1972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,16384,0.1694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,2,0.1185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,16,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,2048,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,64,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,128,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,256,0.1281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,4,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,32,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,65536,0.2608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,8,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,8192,0.1762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,16384,0.1853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,512,0.1363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,2048,0.1646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,1024,0.1417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,131072,0.4186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,65536,0.2879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,4,0.1250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,32768,0.2164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,4096,0.1697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,16,0.1213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,8,0.1222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,32,0.1247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,64,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,1024,0.1516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,256,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,2048,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,128,0.1262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,8192,0.1975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,2,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,32768,0.2489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,4096,0.1920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,512,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,16384,0.2114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,65536,0.3312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,2,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,131072,0.4787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,64,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,128,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,16,0.1372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,4,0.1323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,256,0.1511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,512,0.1641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,8,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,8192,0.2647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,2048,0.2516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,32,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,32768,0.3326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,65536,0.4383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,16384,0.2878
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,4096,0.2560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,8,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,131072,0.6243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,1024,0.1897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,16,0.1551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,64,0.1555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,32,0.1542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,256,0.1789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,4,0.1543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,2,0.1557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,512,0.2035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,1024,0.2565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,8192,0.3793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,16384,0.4047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,128,0.1668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,2048,0.3552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,2,0.1911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,32768,0.4803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,4,0.1919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,4096,0.3582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,32,0.1907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,16,0.1931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,128,0.2138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,256,0.2343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,64,0.1953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,65536,0.6185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,4096,0.5664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,131072,0.8927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,512,0.2823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,2048,0.5488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,16384,0.6492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,8,0.1896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,2,0.2711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,65536,0.9978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,8192,0.5953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,4,0.2713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,1024,0.3745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,16,0.2713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,131072,1.5617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,32,0.2713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,8,0.2708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,64,0.2820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,32768,0.7681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,512,0.4491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,4096,1.0000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,1024,0.6343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,256,0.3609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,128,0.3132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,2048,0.9977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,8192,1.0759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,4,0.4439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,65536,1.8945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,8,0.4434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,32768,1.3946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,16384,1.1665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,32,0.4647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,2,0.4431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,256,0.6202
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,128,0.5267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,64,0.4459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,16,0.4630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,1024,1.1480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,8192,1.9914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,4096,1.8873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,16384,2.1765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,512,0.7931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,4,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,2,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,2048,1.8614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,32768,2.6380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,32,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,16,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,64,0.0957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,8,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,128,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,2048,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,512,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,16384,0.1275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,1024,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,32768,0.1523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,131072,0.3135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,256,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,8192,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,4,0.1040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,4096,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,16,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,2,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,65536,0.2139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,128,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,512,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,1024,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,2048,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,32,0.1041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,8192,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,16384,0.1375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,32768,0.1655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,4096,0.1234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,256,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,2,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,131072,0.3271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,65536,0.2258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,8,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,32,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,4,0.1081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,64,0.1038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,16,0.1047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,64,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,256,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,128,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,4096,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,512,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,8,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,16384,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,2048,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,65536,0.2317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,32768,0.1688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,2,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,4,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,131072,0.3460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,1024,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,8192,0.1341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,64,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,128,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,16,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,512,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,1024,0.1137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,32,0.1048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,8,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,256,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,4096,0.1332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,32768,0.1827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,131072,0.3652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,2,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,8192,0.1379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,65536,0.2485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,8,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,4,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,32,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,16,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,64,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,256,0.1117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,1024,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,16384,0.1543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,128,0.1090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,2048,0.1394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,16384,0.1674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,4096,0.1453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,2048,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,131072,0.3977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,32768,0.1971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,4,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,8,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,2,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,8192,0.1555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,65536,0.2672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,512,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,64,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,16,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,1024,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,128,0.1125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,256,0.1163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,2048,0.1604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,16384,0.1849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,32768,0.2226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,4096,0.1633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,8192,0.1696
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,131072,0.4539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,512,0.1201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,8,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,65536,0.3032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,4,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,16,0.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,32,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,2,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,32,0.1161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,256,0.1220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,1024,0.1506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,4096,0.1927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,128,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,64,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,2048,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,65536,0.3708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,8192,0.2017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,131072,0.5599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,16384,0.2222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,512,0.1324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,4,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,8,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,32,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,64,0.1291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,32768,0.2694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,128,0.1386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,256,0.1515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,1024,0.1948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,4096,0.2677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,8192,0.2818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,2048,0.2554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,16384,0.3095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,512,0.1671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,2,0.1289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,131072,0.7920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,65536,0.5229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,32768,0.3785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,16,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,16,0.1598
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,2,0.1594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,32,0.1599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,8,0.1646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,512,0.2142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,128,0.1746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,64,0.1663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,4096,0.3934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,256,0.1913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,8192,0.4173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,4,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,1024,0.2640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,2,0.2189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,65536,0.8312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,2048,0.3729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,16384,0.4758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,8,0.2184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,131072,1.3839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,4,0.2181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,64,0.2197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,16,0.2193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,256,0.2669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,512,0.3240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,2048,0.6184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,32768,0.5990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,32,0.2290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,128,0.2419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,1024,0.4145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,4096,0.6514
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,16384,0.8153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,8192,0.6991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,32768,1.0380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,16,0.3406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,4,0.3415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,2,0.3613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,8,0.3411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,128,0.3869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,512,0.5409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,64,0.3420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,1024,0.7522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,4096,1.1680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,2048,1.1121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,8192,1.2727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,256,0.4360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,16384,1.4791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,4,0.0913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,65536,1.5538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,32,0.3463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,2,0.0890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,8,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,32768,1.9390
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,16,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,64,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,256,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,32,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,2048,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,8192,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,16384,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,4096,0.1079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,128,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,512,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,2,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,1024,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,131072,0.3117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,8,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,4,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,32768,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,128,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,32,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,512,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,16,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,1024,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,2048,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,64,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,65536,0.2046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,16384,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,256,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,131072,0.3228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,2,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,65536,0.2189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,4096,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,8,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,8192,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,32768,0.1598
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,128,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,64,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,16,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,4,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,512,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,32,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,2048,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,16384,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,32768,0.1619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,8192,0.1265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,131072,0.3351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,2,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,4096,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,8,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,16,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,4,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,65536,0.2260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,32,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,256,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,64,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,1024,0.1069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,2048,0.1161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,256,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,1024,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,8192,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,128,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,16384,0.1374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,512,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,2,0.1017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,4,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,131072,0.3613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,4096,0.1184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,65536,0.2404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,32,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,32768,0.1670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,256,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,128,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,64,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,8,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,16,0.1028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,8192,0.1417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,1024,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,512,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,16384,0.1569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,131072,0.3920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,2048,0.1240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,4,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,65536,0.2615
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,8,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,32768,0.1907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,2,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,32,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,256,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,128,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,512,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,2048,0.1379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,4096,0.1468
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,64,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,8192,0.1579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,16384,0.1726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,32768,0.2080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,65536,0.2903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,1024,0.1178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,4,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,4096,0.1262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,16,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,32,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,2,0.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,128,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,64,0.1082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,512,0.1223
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,256,0.1188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,8,0.1079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,131072,0.4410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,2048,0.1669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,16,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,32768,0.2517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,4096,0.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,16384,0.2046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,8192,0.1844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,131072,0.5422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,65536,0.3545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,8,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,1024,0.1394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,4,0.1192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,64,0.1168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,256,0.1275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,16,0.1179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,32,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,2,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,1024,0.1635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,512,0.1451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,4096,0.2152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,8192,0.2308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,65536,0.4725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,128,0.1240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,32768,0.3270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,2048,0.2041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,16384,0.2640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,4,0.1428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,8,0.1459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,32,0.1426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,16,0.1426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,64,0.1429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,2,0.1405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,512,0.1906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,256,0.1683
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,2048,0.2912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,4096,0.3066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,128,0.1609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,131072,0.7419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,32768,0.5079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,8192,0.3341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,1024,0.2190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,8,0.1920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,65536,0.7398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,4,0.1906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,131072,1.2986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,16384,0.3945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,16,0.1930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,2,0.2035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,32,0.1909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,512,0.2615
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,2048,0.4688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,256,0.2311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,1024,0.3333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,16384,0.6613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,128,0.2111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,4096,0.5177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,64,0.1948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,32768,0.8944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,8,0.2895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,2,0.3101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,8192,0.5496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,16,0.3134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,64,0.2915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,256,0.3724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,32,0.2885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,65536,1.4116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,128,0.3190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,4,0.2874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,512,0.4123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,1024,0.5508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,16384,1.1614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,4096,0.8665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,2048,0.7865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,8,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,2,0.0866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,32768,1.6000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,4,0.0887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,8192,0.9460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,128,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,64,0.0870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,16,0.0867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,32,0.0893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,1024,0.0890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,2048,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,16384,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,8192,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,32768,0.1477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,256,0.0887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,4096,0.1039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,512,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,4,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,65536,0.2016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,2,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,131072,0.3083
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,8,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,64,0.0979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,256,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,16,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,32,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,4096,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,8192,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,2048,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,16384,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,512,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,32768,0.1560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,2,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,4,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,131072,0.3163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,1024,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,32,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,128,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,8,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,16,0.0980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,512,0.1002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,256,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,128,0.0982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,1024,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,8192,0.1234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,2048,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,16384,0.1331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,4096,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,32768,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,65536,0.2148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,131072,0.3339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,4,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,64,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,32,0.0978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,16,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,65536,0.2208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,256,0.0980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,512,0.1008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,8,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,64,0.0987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,1024,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,2,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,2048,0.1141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,128,0.1005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,4096,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,16384,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,131072,0.3529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,8192,0.1233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,65536,0.2365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,16,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,32,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,4,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,2,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,64,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,128,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,256,0.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,8,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,512,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,4096,0.1261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,8192,0.1348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,16384,0.1434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,2048,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,65536,0.2566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,32768,0.1643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,32768,0.1846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,2,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,131072,0.3869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,1024,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,8,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,16,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,256,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,64,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,512,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,2048,0.1336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,32,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,4096,0.1407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,1024,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,16384,0.1704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,8192,0.1542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,131072,0.4346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,32768,0.2048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,4,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,2,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,8,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,16,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,32,0.1060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,65536,0.2878
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,4,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,128,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,128,0.1082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,256,0.1129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,4096,0.1703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,8192,0.1795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,64,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,16384,0.1999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,32768,0.2460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,2048,0.1640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,131072,0.5388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,1024,0.1328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,8,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,512,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,2,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,65536,0.3503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,64,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,128,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,256,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,512,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,32,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,2048,0.2028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,8192,0.2259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,4096,0.2100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,4,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,1024,0.1598
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,16,0.1145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,65536,0.4675
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,131072,0.7365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,4,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,32768,0.3251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,2,0.1365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,8,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,16384,0.2588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,16,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,32,0.1375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,512,0.1791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,256,0.1670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,1024,0.2128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,4096,0.2979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,16384,0.3816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,8192,0.3275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,2048,0.2859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,64,0.1393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,128,0.1457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,2,0.1869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,131072,1.2884
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,32768,0.5066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,4,0.1866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,8,0.1973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,32,0.1870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,64,0.1889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,65536,0.7334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,128,0.2052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,512,0.2573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,16,0.1848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,1024,0.3238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,4096,0.4910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,256,0.2339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,8192,0.5423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,16384,0.6513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,32768,0.8876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,2,0.2980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,2048,0.4593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,4,0.2802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,65536,1.3804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,32,0.2772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,8,0.2980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,128,0.3294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,64,0.2816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,16,0.2781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,1024,0.5121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,8192,0.9467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,4096,0.8480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,2048,0.7683
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,16384,1.1428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,256,0.3369
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,512,0.3985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,2,0.0873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,16,0.0885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,8,0.0848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,32768,1.5799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,256,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,64,0.0866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,128,0.0870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,1024,0.0872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,4,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,2048,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,16384,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,8192,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,4096,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,65536,0.2032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,131072,0.3074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,4,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,512,0.0906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,16,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,32,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,32768,0.1407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,64,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,128,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,2,0.0938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,8,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,2048,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,4096,0.1149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,8192,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,1024,0.0956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,256,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,16384,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,131072,0.3118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,2,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,65536,0.2148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,8,0.0987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,512,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,32,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,4,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,128,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,64,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,32768,0.1544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,16,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,2048,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,4096,0.1160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,256,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,16384,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,32768,0.1572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,65536,0.2176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,131072,0.3343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,2,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,512,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,8,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,8192,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,16,0.0978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,64,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,128,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,32,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,1024,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,2048,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,512,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,256,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,16384,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,32768,0.1627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,8192,0.1198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,131072,0.3532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,4,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,4,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,2,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,8,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,16,0.0984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,64,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,65536,0.2392
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,128,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,256,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,1024,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,1024,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,512,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,2048,0.1234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,8192,0.1290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,4096,0.1220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,32,0.0987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,65536,0.2567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,32768,0.1863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,16384,0.1438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,4,0.0998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,8,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,32,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,16,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,131072,0.3875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,64,0.1008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,256,0.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,128,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,2,0.1000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,512,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,4096,0.1404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,8192,0.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,1024,0.1104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,32768,0.2019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,65536,0.2862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,2048,0.1359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,16384,0.1664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,4,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,8,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,16,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,64,0.1044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,2,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,128,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,32,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,1024,0.1270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,256,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,4096,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,2048,0.1622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,512,0.1172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,4096,0.1694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,131072,0.4347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,16384,0.1981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,8192,0.1776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,131072,0.5365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,65536,0.3445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,4,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,16,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,32,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,32768,0.2435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,2,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,8,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,128,0.1172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,512,0.1383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,64,0.1126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,1024,0.1584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,4096,0.2101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,256,0.1221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,32768,0.3235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,8192,0.2269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,2048,0.1998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,2,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,65536,0.4656
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,16384,0.2549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,8,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,64,0.1371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,32,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,16,0.1395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,131072,0.7378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,4,0.1356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,256,0.1588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,128,0.1470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,1024,0.2087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,4096,0.3008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,512,0.1831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,16384,0.3807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,2048,0.2804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,2,0.1952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,32768,0.5026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,131072,1.2836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,8192,0.3267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,8,0.1849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,65536,0.7284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,4,0.1946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,32,0.1968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,64,0.1870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,128,0.2042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,2048,0.4589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,512,0.2548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,256,0.2216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,8192,0.5379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,16384,0.6512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,4096,0.5013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,32768,0.8848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,65536,1.4053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,1024,0.3240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,2,0.2732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,16,0.1874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,4,0.2735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,16,0.2753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,64,0.2981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,32,0.2937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,512,0.3909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,128,0.3038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,256,0.3312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,8,0.2748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,8192,0.9436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,2048,0.7636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,16384,1.1399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,4096,0.8411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,1024,0.5193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,2,0.0877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,32768,1.5734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,4,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,64,0.0847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,128,0.0885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,16,0.0849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,32,0.0849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,512,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,8,0.0861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,256,0.0902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,4096,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,16384,0.1163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,1024,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,32768,0.1396
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,65536,0.2015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,2048,0.0978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,131072,0.3057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,8,0.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,8192,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,16,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,2,0.0958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,128,0.0977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,4,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,512,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,2048,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,4096,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,32,0.0956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,1024,0.0954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,8192,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,64,0.0957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,131072,0.3169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,65536,0.2153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,32768,0.1535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,8,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,256,0.0977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,4,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,16,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,16384,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,64,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,128,0.0953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,512,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,1024,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,4096,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,8192,0.1191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,2,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,2048,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,32,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,32768,0.1566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,16384,0.1295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,4,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,2,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,131072,0.3282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,8,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,256,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,128,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,32,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,64,0.0961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,512,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,65536,0.2162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,16,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,4096,0.1150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,256,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,32768,0.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,8192,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,16384,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,2,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,131072,0.3525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,1024,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,8,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,16,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,32,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,2048,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,4,0.0979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,65536,0.2319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,512,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,128,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,2048,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,256,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,4096,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,64,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,32768,0.1838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,16384,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,2,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,65536,0.2534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,8192,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,8,0.1000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,16,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,4,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,64,0.0987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,32,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,512,0.1067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,1024,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,128,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,2048,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,8192,0.1496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,1024,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,16384,0.1673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,131072,0.3854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,32768,0.2022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,256,0.1020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,65536,0.2849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,131072,0.4337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,8,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,16,0.1056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,4,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,2,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,128,0.1057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,256,0.1088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,64,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,2048,0.1625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,4096,0.1669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,4096,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,512,0.1143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,16384,0.1962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,32768,0.2436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,131072,0.5349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,65536,0.3459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,1024,0.1314
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,4,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,32,0.1041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,8,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,16,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,8192,0.1761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,64,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,512,0.1370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,256,0.1203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,1024,0.1543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,2,0.1147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,8192,0.2254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,32,0.1125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,16384,0.2558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,128,0.1144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,65536,0.4661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,4096,0.2085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,2,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,4,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,8,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,16,0.1391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,64,0.1346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,128,0.1436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,2048,0.2010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,256,0.1580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,512,0.1822
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,4096,0.2958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,131072,0.7378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,1024,0.2082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,16384,0.3854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,8192,0.3223
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,32768,0.4977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,2048,0.2797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,2,0.1939
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,32,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,131072,1.2818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,4,0.1832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,32768,0.3208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,65536,0.7346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,8,0.1829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,16,0.1839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,32,0.1951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,256,0.2212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,128,0.2038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,64,0.1850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,1024,0.3210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,512,0.2537
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,16384,0.6500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,8192,0.5487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,4096,0.4897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,4,0.2730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,2048,0.4579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,8,0.2928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,32768,0.8802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,2,0.2929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,64,0.2770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,128,0.3027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,16,0.2718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,256,0.3504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,1024,0.5198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,2048,0.7612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,4096,0.8165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,32,0.2733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,512,0.3898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,4,0.0846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,16384,1.1521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,2,0.0848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,32768,1.5913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,8192,0.9180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,16,0.0871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,32,0.0848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,256,0.0850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,64,0.0848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,128,0.0866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,1024,0.0868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,65536,1.3792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,512,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,2048,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,4096,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,16384,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,32768,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,8192,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,2,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,4,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,8,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,16,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,65536,0.2029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,64,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,131072,0.3063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,256,0.0959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,128,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,2048,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,32,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,1024,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,8192,0.1179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,16384,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,512,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,32768,0.1499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,4096,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,131072,0.3143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,2,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,4,0.0961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,8,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,32,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,16,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,256,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,64,0.0959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,128,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,1024,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,512,0.0963
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,4096,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,65536,0.2137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,8192,0.1184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,32768,0.1567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,16384,0.1294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,2,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,2048,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,8,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,4,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,131072,0.3295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,65536,0.2136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,128,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,64,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,512,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,1024,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,16,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,2048,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,16384,0.1296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,32,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,32768,0.1567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,65536,0.2302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,256,0.0965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,4,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,8,0.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,2,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,16,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,131072,0.3543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,4096,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,32,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,256,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,64,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,1024,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,512,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,8192,0.1269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,16384,0.1448
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,4096,0.1202
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,2048,0.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,65536,0.2532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,32768,0.1848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,2,0.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,4,0.1000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,128,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,8,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,16,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,32,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,128,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,512,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,64,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,1024,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,4096,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,256,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,16384,0.1669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,131072,0.3845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,2048,0.1326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,65536,0.2866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,8192,0.1498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,2,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,32768,0.2015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,4,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,16,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,32,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,131072,0.4331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,8,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,256,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,512,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,64,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,2048,0.1601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,4096,0.1666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,128,0.1061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,32768,0.2431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,16384,0.1974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,8192,0.1758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,65536,0.3437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,8192,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,2,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,4,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,8,0.1123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,16,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,1024,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,32,0.1109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,256,0.1180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,64,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,512,0.1342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,131072,0.5346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,4096,0.2054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,1024,0.1561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,128,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,32768,0.3200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,16384,0.2528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,8192,0.2242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,65536,0.4615
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,2048,0.2014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,131072,0.7356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,16,0.1324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,2,0.1324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,8,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,128,0.1475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,64,0.1359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,4,0.1375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,32,0.1334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,512,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,2048,0.2807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,256,0.1575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,1024,0.2097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,16384,0.3841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,32768,0.4987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,8192,0.3228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,2,0.1812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,4096,0.2990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,8,0.1936
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,4,0.1830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,65536,0.7266
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,128,0.2137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,16,0.1815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,256,0.2323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,512,0.2540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,1024,0.3120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,64,0.1851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,2048,0.4641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,8192,0.5459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,16384,0.6484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,32,0.1813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,32768,0.8816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,4,0.2912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,2,0.3004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,8,0.2731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,131072,1.2870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,65536,1.4011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,4096,0.4841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,16,0.2704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,32,0.2717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,128,0.3239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,256,0.3310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,64,0.2958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,2048,0.7559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,512,0.3836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,16384,1.1486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,1024,0.5012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,4096,0.8130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,8,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,8192,0.9151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,4,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,128,0.1349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,64,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,256,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,32768,1.5675
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,512,0.1365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,2048,0.1408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,32,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,4096,0.1537
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,16,0.1321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,1024,0.1411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,16384,0.1693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,8192,0.1594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,32768,0.1918
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,65536,0.2515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,131072,0.3556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,8,0.1428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,2,0.1405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,4,0.1418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,32,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,256,0.1447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,64,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,512,0.1428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,1024,0.1470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,16,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,4096,0.1663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,8192,0.1669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,128,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,32768,0.2050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,2048,0.1606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,65536,0.2645
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,4,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,131072,0.3690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,8,0.1428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,2,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,32,0.1432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,64,0.1412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,128,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,16,0.1412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,256,0.1444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,512,0.1451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,1024,0.1481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,16384,0.1798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,2048,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,32768,0.2055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,16384,0.1826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,65536,0.2677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,4096,0.1691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,131072,0.3738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,8,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,4,0.1406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,32,0.1429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,16,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,8192,0.1706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,2,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,128,0.1445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,256,0.1452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,2048,0.1650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,16384,0.1848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,32768,0.2076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,4096,0.1661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,512,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,8192,0.1734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,131072,0.3905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,64,0.1410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,65536,0.2736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,2,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,1024,0.1506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,4,0.1434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,16,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,512,0.1490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,2048,0.1672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,4096,0.1704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,8192,0.1771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,16384,0.1891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,32768,0.2178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,131072,0.4194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,2,0.1481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,32,0.1459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,128,0.1461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,256,0.1475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,8,0.1468
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,65536,0.2893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,64,0.1438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,4,0.1485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,64,0.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,128,0.1530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,1024,0.1522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,16,0.1484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,256,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,512,0.1566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,2048,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,8,0.1486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,32768,0.2364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,8192,0.1866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,16384,0.2006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,1024,0.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,131072,0.4678
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,32,0.1475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,65536,0.3201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,16,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,64,0.1569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,32,0.1561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,4096,0.1789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,2,0.1558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,128,0.1576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,4096,0.1921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,8,0.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,8192,0.2021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,16384,0.2229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,32768,0.2701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,65536,0.3717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,131072,0.5633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,2,0.1756
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,4,0.1752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,8,0.1750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,16,0.1750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,32,0.1748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,64,0.1756
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,128,0.1770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,256,0.1803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,512,0.1847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,1024,0.1940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,2048,0.2258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,4096,0.2339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,8192,0.2491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,16384,0.2827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,32768,0.3476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,65536,0.4888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,131072,0.7634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,2,0.2180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,4,0.1570
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,256,0.1585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,512,0.1608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,8,0.2173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,16,0.2172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,1024,0.1655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,32,0.2198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,64,0.2192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,128,0.2258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,256,0.2259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,1024,0.2578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,512,0.2362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,2048,0.3091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,8192,0.3490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,4096,0.3203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,32768,0.5247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,65536,0.7556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,2,0.3063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,131072,1.3165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,16384,0.4106
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,4,0.3038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,4,0.2176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,8,0.3058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,32,0.3062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,16,0.3040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,1024,0.3737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,128,0.3140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,64,0.3190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,2048,0.1887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,256,0.3221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,2048,0.4669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,8192,0.5473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,16384,0.6617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,2,0.5254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,65536,1.4144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,8,0.5236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,32768,0.8996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,32,0.5260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,512,0.3385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,128,0.5384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,256,0.5748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,512,0.5876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,64,0.5244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,1024,0.6557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,4096,0.4927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,32768,1.6209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,2,0.1067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,4,0.1059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,8,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,16,0.5256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,4,0.5244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,8192,0.9775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,16384,1.1927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,32,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,64,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,512,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,256,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,1024,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,2048,0.8212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,4096,0.1241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,8192,0.1341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,128,0.1060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,2048,0.1213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,32768,0.1642
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,65536,0.2201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,131072,0.3314
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,4,0.1137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,4096,0.8916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,8,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,16,0.1120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,2,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,16,0.1056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,64,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,128,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,1024,0.1186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,256,0.1146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,512,0.1143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,32,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,8192,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,2048,0.1265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,16384,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,2,0.1141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,16384,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,131072,0.3491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,8,0.1142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,16,0.1143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,4,0.1158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,32768,0.1743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,64,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,256,0.1159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,512,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,1024,0.1207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,128,0.1160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,4096,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,2048,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,8192,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,16384,0.1542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,32768,0.1822
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,32,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,4096,0.1341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,65536,0.2340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,65536,0.2459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,2,0.1162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,4,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,8,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,16,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,32,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,131072,0.3550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,512,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,256,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,1024,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,2048,0.1388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,128,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,16384,0.1605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,64,0.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,131072,0.3653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,65536,0.2510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,4096,0.1398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,32768,0.1878
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,16,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,32,0.1180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,64,0.1190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,8192,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,128,0.1231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,512,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,2,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,8,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,1024,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,8192,0.1596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,4,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,32768,0.1995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,2048,0.1485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,131072,0.4006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,4096,0.1532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,256,0.1248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,65536,0.2722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,16384,0.1729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,4,0.1207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,32,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,64,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,8,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,512,0.1295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,2,0.1213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,256,0.1263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,16,0.1209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,128,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,1024,0.1374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,2048,0.1603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,4096,0.1666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,2,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,65536,0.3067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,4,0.1296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,8,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,16384,0.1868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,32768,0.2253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,8192,0.1712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,16,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,32,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,64,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,128,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,131072,0.4545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,2048,0.1953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,4096,0.2005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,8192,0.2087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,16384,0.2305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,32768,0.2779
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,65536,0.3815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,2,0.1473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,256,0.1391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,8,0.1459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,4,0.1474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,32,0.1466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,64,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,128,0.1560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,256,0.1616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,512,0.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,1024,0.1975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,4096,0.2653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,2048,0.2578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,8192,0.2822
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,32768,0.3807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,131072,0.7932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,16384,0.3121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,2,0.1811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,65536,0.5225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,1024,0.1582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,8,0.1799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,512,0.1473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,131072,0.5713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,16,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,16,0.1808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,32,0.1805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,4,0.1791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,2048,0.3648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,512,0.2315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,256,0.2076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,8192,0.4083
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,16384,0.4653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,32768,0.5894
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,1024,0.2694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,2,0.2528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,65536,0.8159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,4,0.2625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,8,0.2509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,131072,1.3716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,16,0.2509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,32,0.2547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,64,0.2639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,128,0.2821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,512,0.3431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,256,0.2988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,1024,0.4327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,2048,0.6345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,4096,0.6509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,16384,0.8123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,8192,0.7047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,32768,1.0465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,128,0.1967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,64,0.1804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,4096,0.3787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,4,0.4103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,65536,1.5774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,8,0.4082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,16,0.4118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,2,0.4085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,128,0.4762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,512,0.6117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,32,0.4093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,64,0.4088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,256,0.5054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,1024,0.7693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,2048,1.1240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,8192,1.2793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,16384,1.5134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,4096,1.1740
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,32,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,8,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,32768,1.9238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,64,0.0935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,128,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,256,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,512,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,2048,0.1076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,4096,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,4,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,1024,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,8192,0.1167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,32768,0.1493
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,16384,0.1254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,2,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,4,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,65536,0.2119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,131072,0.3160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,2,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,8,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,16,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,64,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,128,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,32,0.1017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,512,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,256,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,1024,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,8192,0.1265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,16384,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,32768,0.1639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,131072,0.3247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,4096,0.1213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,2,0.1057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,2048,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,65536,0.2238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,32,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,64,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,128,0.1057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,16,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,512,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,4,0.1044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,2048,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,256,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,4096,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,8192,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,32768,0.1658
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,8,0.1060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,65536,0.2325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,131072,0.3387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,1024,0.1101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,2,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,16384,0.1368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,32,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,8,0.1056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,64,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,256,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,128,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,4,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,16,0.1061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,512,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,2048,0.1220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,8192,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,4096,0.1252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,1024,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,131072,0.3585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,65536,0.2442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,32768,0.1733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,2,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,32,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,16,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,128,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,64,0.1079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,256,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,512,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,2048,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,1024,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,4096,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,8192,0.1383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,65536,0.2566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,131072,0.3877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,2,0.1080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,4,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,8,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,32,0.1076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,64,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,128,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,256,0.1121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,512,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,1024,0.1183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,2048,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,4096,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,16384,0.1687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,32768,0.2047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,65536,0.2871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,16384,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,131072,0.4368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,2,0.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,4,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,8,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,4,0.1117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,8,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,16,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,32,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,64,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,128,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,8192,0.1526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,512,0.1219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,16,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,256,0.1183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,1024,0.1332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,2048,0.1603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,8192,0.1736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,32768,0.2422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,16384,0.1943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,16384,0.1527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,65536,0.3443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,4096,0.1622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,8,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,4,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,16,0.1280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,32,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,2,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,131072,0.5328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,512,0.1476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,64,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,128,0.1315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,256,0.1377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,8192,0.2244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,4096,0.2058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,16384,0.2531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,2048,0.1972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,131072,0.7374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,65536,0.4626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,32768,0.3189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,2,0.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,4,0.1545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,8,0.1545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,32,0.1559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,128,0.1637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,64,0.1543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,16,0.1585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,1024,0.2105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,512,0.1831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,256,0.1695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,8192,0.3203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,4096,0.2864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,32768,0.4916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,32768,0.1844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,2048,0.2733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,65536,0.7219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,2,0.2073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,131072,1.2868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,4,0.2087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,16384,0.3759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,8,0.2078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,1024,0.1583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,32,0.2073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,64,0.2188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,128,0.2248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,2048,0.4228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,512,0.2583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,4096,0.4525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,8192,0.5155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,16,0.2078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,256,0.2344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,1024,0.3084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,2,0.3218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,16384,0.6133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,4,0.3428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,8,0.3204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,16,0.3223
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,64,0.3219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,256,0.3752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,65536,1.3485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,32,0.3226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,32768,0.8432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,512,0.4220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,128,0.3771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,4096,0.7830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,1024,0.5178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,2048,0.7321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,8,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,16384,1.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,32768,1.5322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,32,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,8192,0.8869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,4,0.0905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,128,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,512,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,2,0.0919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,256,0.0937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,2048,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,4096,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,1024,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,16,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,32768,0.1485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,131072,0.3074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,16384,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,8192,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,8,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,16,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,32,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,2,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,128,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,65536,0.2032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,4,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,64,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,1024,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,64,0.0999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,8192,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,512,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,32768,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,2048,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,16384,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,131072,0.3164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,4,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,2,0.1018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,256,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,4096,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,65536,0.2158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,128,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,64,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,16,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,8,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,256,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,1024,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,8192,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,4096,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,16384,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,32,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,32768,0.1603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,65536,0.2201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,2048,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,4,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,16,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,131072,0.3326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,512,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,8,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,32,0.1001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,2,0.1006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,1024,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,64,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,256,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,512,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,2048,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,128,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,8192,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,131072,0.3511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,65536,0.2330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,2,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,8,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,16384,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,32768,0.1648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,4096,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,64,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,256,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,4,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,32,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,16,0.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,4096,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,2048,0.1224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,8192,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,512,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,128,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,65536,0.2506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,16384,0.1428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,1024,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,8,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,131072,0.3823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,32,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,64,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,4,0.1041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,128,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,2,0.1043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,512,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,1024,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,256,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,16,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,32768,0.1788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,32768,0.1948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,2048,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,8192,0.1405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,2,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,4,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,131072,0.4265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,4096,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,16,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,64,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,16384,0.1576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,128,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,32,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,65536,0.2783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,2048,0.1464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,512,0.1145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,8,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,8192,0.1607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,32768,0.2299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,65536,0.3308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,1024,0.1185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,131072,0.5212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,2,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,4,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,256,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,16384,0.1830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,32,0.1167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,64,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,4096,0.1522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,16,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,128,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,512,0.1267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,8,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,2048,0.1677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,4096,0.1781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,8192,0.1935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,32768,0.2912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,1024,0.1413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,256,0.1212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,2,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,4,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,16,0.1384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,131072,0.7059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,8,0.1391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,65536,0.4335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,16384,0.2235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,64,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,512,0.1612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,1024,0.1770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,2048,0.2220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,4096,0.2384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,128,0.1432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,32,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,8192,0.2758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,32768,0.4399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,256,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,65536,0.6712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,2,0.1853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,16384,0.3228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,32,0.1853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,16,0.1842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,131072,1.2259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,128,0.1952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,4,0.1869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,512,0.2225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,8,0.1964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,256,0.2043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,4096,0.3745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,1024,0.2541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,2048,0.3453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,32768,0.7776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,8192,0.4268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,16384,0.5376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,4,0.2777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,8,0.2770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,2,0.2773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,64,0.2771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,65536,1.2865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,16,0.2977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,32,0.2782
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,128,0.2971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,256,0.3126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,512,0.3415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,1024,0.4046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,16384,0.9286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,4096,0.6299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,64,0.1845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,4,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,32768,1.3610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,8192,0.7158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,2048,0.5596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,32,0.0887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,64,0.0897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,8,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,2,0.0875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,1024,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,512,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,128,0.0876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,8192,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,16,0.0866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,256,0.0914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,2048,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,131072,0.3064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,16384,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,65536,0.1999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,8,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,4096,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,32768,0.1436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,2,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,4,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,128,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,16,0.0977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,1024,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,2048,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,64,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,32,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,256,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,8192,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,32768,0.1579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,131072,0.3166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,512,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,2,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,4,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,4096,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,65536,0.2106
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,32,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,64,0.0981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,16384,0.1275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,512,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,256,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,1024,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,2048,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,8192,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,4096,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,16,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,8,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,131072,0.3265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,65536,0.2188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,32768,0.1569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,128,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,16,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,8,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,32,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,64,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,256,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,128,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,16384,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,1024,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,4,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,512,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,16384,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,2,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,8192,0.1204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,4096,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,32768,0.1620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,131072,0.3511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,2,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,65536,0.2279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,4,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,2048,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,64,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,128,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,256,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,16,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,32,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,4096,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,2048,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,8192,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,8,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,65536,0.2508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,32768,0.1754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,1024,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,4,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,2,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,8,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,512,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,64,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,131072,0.3788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,16384,0.1379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,512,0.1045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,1024,0.1060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,128,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,32,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,2048,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,16384,0.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,8192,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,65536,0.2749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,32768,0.1906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,2,0.1056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,256,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,4096,0.1285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,8,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,131072,0.4210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,16,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,32,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,64,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,4,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,128,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,512,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,4096,0.1474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,1024,0.1159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,8192,0.1575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,16384,0.1785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,65536,0.3278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,256,0.1089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,131072,0.5177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,2,0.1150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,32768,0.2249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,16,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,4,0.1129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,16,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,128,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,64,0.1141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,8,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,512,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,1024,0.1374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,256,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,2048,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,2048,0.1659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,16384,0.2225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,32768,0.2868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,65536,0.4310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,4096,0.1762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,32,0.1123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,131072,0.7020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,8192,0.1913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,8,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,2,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,16,0.1324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,64,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,128,0.1403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,4,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,1024,0.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,4096,0.2317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,8192,0.2599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,256,0.1444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,32,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,512,0.1564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,32768,0.4352
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,16384,0.3168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,2048,0.2253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,2,0.1773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,16,0.1778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,131072,1.2240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,65536,0.6662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,64,0.1789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,4,0.1880
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,8,0.1768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,32,0.1804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,128,0.1879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,256,0.1991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,512,0.2150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,2048,0.3364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,1024,0.2472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,2,0.2610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,32768,0.7568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,65536,1.2739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,8,0.2611
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,16384,0.5288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,8192,0.4163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,16,0.2626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,4096,0.3750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,32,0.2611
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,128,0.2835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,4,0.2614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,64,0.2629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,1024,0.3879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,256,0.2971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,512,0.3269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,4096,0.5920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,2048,0.5618
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,4,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,16,0.0850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,8192,0.6978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,32768,1.3432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,32,0.0847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,64,0.0870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,8,0.0859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,2,0.0882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,512,0.0868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,128,0.0866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,2048,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,256,0.0873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,16384,0.9087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,4096,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,8192,0.1084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,32768,0.1403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,16384,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,65536,0.2026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,4,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,16,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,2,0.0957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,1024,0.0912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,8,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,128,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,256,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,131072,0.3020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,32,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,2048,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,4096,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,16384,0.1260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,1024,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,64,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,512,0.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,65536,0.2097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,131072,0.3152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,8192,0.1190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,8,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,32,0.0956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,16,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,64,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,128,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,512,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,256,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,2048,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,2,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,32768,0.1508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,16384,0.1286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,1024,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,4,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,131072,0.3238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,32768,0.1565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,65536,0.2157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,2,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,8,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,16,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,64,0.0955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,128,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,32,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,512,0.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,256,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,4096,0.1149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,4,0.0961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,2048,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,8192,0.1180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,16384,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,4096,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,65536,0.2274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,2,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,1024,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,4,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,8192,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,32,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,131072,0.3482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,64,0.0978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,16,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,512,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,128,0.1000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,8,0.0985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,2048,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,1024,0.1020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,8192,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,16384,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,32768,0.1603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,4096,0.1229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,256,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,2,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,131072,0.3773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,65536,0.2485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,32,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,16,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,4,0.1001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,64,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,8,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,32768,0.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,1024,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,128,0.0999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,4096,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,256,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,8192,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,2048,0.1188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,131072,0.4211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,65536,0.2732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,512,0.1021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,32768,0.1901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,16384,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,32,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,2,0.1045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,64,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,128,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,256,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,512,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,2048,0.1392
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,4096,0.1445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,8,0.1039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,16,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,32768,0.2216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,4,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,65536,0.3243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,16384,0.1758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,4,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,8,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,1024,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,8192,0.1546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,32,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,2,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,131072,0.5141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,16,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,64,0.1105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,256,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,2048,0.1671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,128,0.1122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,16384,0.2190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,4096,0.1723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,65536,0.4295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,8192,0.1901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,2,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,4,0.1313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,512,0.1210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,1024,0.1363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,16,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,8,0.1342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,64,0.1315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,128,0.1351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,32,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,1024,0.1765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,512,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,131072,0.7005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,8192,0.2579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,2048,0.2159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,16384,0.3208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,4096,0.2294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,32768,0.4335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,32768,0.2869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,2,0.1747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,4,0.1851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,256,0.1427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,16,0.1744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,64,0.1771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,32,0.1778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,65536,0.6630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,128,0.1970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,512,0.2136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,256,0.1960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,1024,0.2445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,4096,0.3627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,131072,1.2213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,16384,0.5255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,8192,0.4235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,8,0.1763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,2,0.2563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,4,0.2565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,65536,1.2570
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,8,0.2772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,32768,0.7531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,16,0.2570
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,2048,0.3363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,64,0.2568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,256,0.2916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,512,0.3410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,2048,0.5357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,128,0.2768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,1024,0.3785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,16384,0.9061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,32,0.2572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,4,0.0856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,8,0.0872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,8192,0.6890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,32768,1.3588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,32,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,64,0.0857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,2,0.0865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,4096,0.5861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,512,0.0868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,256,0.0867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,16,0.0879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,128,0.0887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,1024,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,8192,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,16384,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,2048,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,4096,0.1020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,65536,0.2037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,32768,0.1392
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,16,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,32,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,2,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,131072,0.3058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,128,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,512,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,1024,0.0974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,4,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,8,0.0938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,4096,0.1109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,16384,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,256,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,64,0.0974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,65536,0.2093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,131072,0.3120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,8192,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,4,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,8,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,2048,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,32,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,128,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,16,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,32768,0.1535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,2,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,2048,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,512,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,4096,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,8192,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,1024,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,32768,0.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,131072,0.3239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,16384,0.1281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,65536,0.2173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,64,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,256,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,32,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,4,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,128,0.0958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,64,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,512,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,1024,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,16,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,4096,0.1137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,8,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,2,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,256,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,65536,0.2264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,131072,0.3467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,2048,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,2,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,4,0.0998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,16,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,16384,0.1282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,32768,0.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,8192,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,64,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,512,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,128,0.1001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,256,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,32,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,8,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,2048,0.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,32768,0.1713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,1024,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,16384,0.1383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,131072,0.3768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,65536,0.2452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,4,0.0976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,8,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,32,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,16,0.1005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,8192,0.1222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,4096,0.1167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,512,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,128,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,2048,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,64,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,1024,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,8192,0.1314
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,16384,0.1519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,256,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,65536,0.2715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,4096,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,2,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,32768,0.1890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,4,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,2,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,32,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,16,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,128,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,131072,0.4207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,256,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,1024,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,4096,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,512,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,8192,0.1535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,16384,0.1759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,8,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,65536,0.3249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,131072,0.5120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,64,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,2,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,4,0.1121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,32768,0.2242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,2048,0.1386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,16,0.1098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,64,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,256,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,8,0.1117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,2048,0.1646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,32,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,1024,0.1347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,128,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,32768,0.2847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,512,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,16384,0.2198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,4,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,131072,0.6981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,65536,0.4292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,8,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,4096,0.1694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,2,0.1289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,32,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,128,0.1341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,16,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,8192,0.1904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,2048,0.2162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,64,0.1305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,4096,0.2296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,256,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,16384,0.3150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,512,0.1536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,8192,0.2616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,2,0.1712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,65536,0.6625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,1024,0.1699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,8,0.1721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,16,0.1836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,32,0.1729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,64,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,131072,1.2205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,32768,0.4345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,4,0.1740
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,512,0.2249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,1024,0.2418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,128,0.1852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,4096,0.3620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,256,0.1953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,16384,0.5239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,2,0.2548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,2048,0.3318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,8192,0.4114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,65536,1.2762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,8,0.2550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,64,0.2559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,4,0.2552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,32768,0.7633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,32,0.2547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,256,0.2898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,128,0.2749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,2048,0.5323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,512,0.3187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,1024,0.3768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,8192,0.6871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,2,0.0873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,4,0.0868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,16384,0.8995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,8,0.0846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,16,0.0849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,32,0.0870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,32768,1.3327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,64,0.0868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,4096,0.6024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,16,0.2555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,1024,0.0897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,256,0.0866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,128,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,2048,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,512,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,4096,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,16384,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,65536,0.1983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,32768,0.1438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,4,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,8192,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,8,0.0956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,2,0.0938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,32,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,128,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,256,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,1024,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,2048,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,4096,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,131072,0.3057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,512,0.0956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,16,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,8192,0.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,64,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,32768,0.1519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,131072,0.3183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,4,0.0953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,8,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,16384,0.1241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,32,0.0954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,2,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,65536,0.2129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,16,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,512,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,1024,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,4096,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,256,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,16384,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,64,0.0974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,65536,0.2145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,32768,0.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,8192,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,2,0.0943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,2048,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,128,0.0956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,131072,0.3223
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,64,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,128,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,16,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,256,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,32,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,512,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,4,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,1024,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,8,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,32768,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,4096,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,8192,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,65536,0.2260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,2,0.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,8,0.0979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,131072,0.3454
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,16384,0.1287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,4,0.0960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,32,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,64,0.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,2048,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,256,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,16,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,128,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,8192,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,2048,0.1148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,16384,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,1024,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,131072,0.3744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,512,0.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,65536,0.2443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,4,0.0983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,8,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,32,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,16,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,64,0.0985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,256,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,512,0.1028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,1024,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,2048,0.1163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,128,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,4096,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,16384,0.1500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,32768,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,32768,0.1712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,131072,0.4190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,2,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,4,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,8192,0.1307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,8,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,65536,0.2712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,64,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,32,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,2,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,16,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,4096,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,2048,0.1371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,512,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,128,0.1047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,1024,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,8192,0.1522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,256,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,32768,0.2200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,16384,0.1747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,4,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,65536,0.3243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,16,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,8,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,4096,0.1430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,32,0.1100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,64,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,256,0.1147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,131072,0.5127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,512,0.1199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,2,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,4096,0.1692
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,2048,0.1630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,8192,0.1888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,16384,0.2166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,128,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,131072,0.6982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,32768,0.2859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,8,0.1289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,4,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,1024,0.1342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,2,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,128,0.1333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,65536,0.4281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,256,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,512,0.1545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,1024,0.1687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,16,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,2048,0.2139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,8192,0.2624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,16384,0.3137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,64,0.1280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,32,0.1285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,32768,0.4314
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,4,0.1725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,131072,1.2188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,65536,0.6608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,8,0.1722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,16,0.1729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,2,0.1835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,32,0.1731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,4096,0.2285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,64,0.1743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,512,0.2101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,1024,0.2477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,2048,0.3418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,128,0.1845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,16384,0.5223
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,4096,0.3593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,256,0.1937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,8192,0.4126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,4,0.2528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,16,0.2541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,8,0.2537
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,2,0.2753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,65536,1.2733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,64,0.2546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,128,0.2746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,32768,0.7511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,32,0.2549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,2048,0.5300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,256,0.2882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,4096,0.5805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,512,0.3167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,1024,0.3950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,16384,0.8992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,4,0.1585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,16,0.1601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,32768,1.3303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,8,0.1586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,2,0.1627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,32,0.1601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,8192,0.6857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,64,0.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,128,0.1630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,512,0.1681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,1024,0.1683
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,2048,0.2987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,256,0.1633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,8192,0.1927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,16384,0.2038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,32768,0.2258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,4096,0.1842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,65536,0.2832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,4,0.1705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,2,0.1718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,8,0.1704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,32,0.1685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,64,0.1687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,128,0.1733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,16,0.1700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,256,0.1758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,512,0.1795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,1024,0.1811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,4096,0.1996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,8192,0.2045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,2048,0.1902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,32768,0.2361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,65536,0.2968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,16384,0.2109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,131072,0.4019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,4,0.1737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,8,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,2,0.1725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,32,0.1736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,64,0.1713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,128,0.1760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,16,0.1731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,256,0.1801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,1024,0.1863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,2048,0.3177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,512,0.1843
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,8192,0.2041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,4096,0.1974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,16384,0.2159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,65536,0.3004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,32768,0.2375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,131072,0.4067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,4,0.1736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,8,0.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,32,0.1734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,16,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,64,0.1720
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,128,0.1813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,2,0.1735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,512,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,1024,0.1931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,256,0.1866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,2048,0.3189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,4096,0.2032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,8192,0.2114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,16384,0.2203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,32768,0.2457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,131072,0.4285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,2,0.1780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,4,0.1767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,8,0.1779
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,65536,0.3092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,16,0.1770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,32,0.1763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,64,0.1782
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,128,0.1842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,256,0.1828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,512,0.1896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,1024,0.1877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,2048,0.3216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,32768,0.2567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,4096,0.2099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,8192,0.2125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,2,0.1850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,65536,0.3255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,4,0.1848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,131072,0.4571
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,8,0.1871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,64,0.1847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,32,0.1835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,128,0.1874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,256,0.1926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,16,0.1856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,512,0.1932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,2048,0.3375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,1024,0.1996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,4096,0.2217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,8192,0.2316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,16384,0.2455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,32768,0.2812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,65536,0.3634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,131072,0.5127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,4,0.1952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,2,0.1994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,16,0.1967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,8,0.1993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,32,0.1966
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,64,0.1970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,128,0.2003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,512,0.2086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,1024,0.2159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,2048,0.3629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,4096,0.2504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,256,0.2025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,16384,0.2802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,32768,0.3281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,65536,0.4289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,2,0.2252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,131072,0.6218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,4,0.2266
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,8,0.2238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,8192,0.2596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,16,0.2248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,32,0.2240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,128,0.2318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,256,0.2339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,512,0.2442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,64,0.2248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,2048,0.4264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,1024,0.2636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,16384,0.2282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,8192,0.3333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,16384,0.3640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,4096,0.3163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,32768,0.4305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,65536,0.5746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,4,0.2725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,131072,0.8479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,8,0.2695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,2,0.2701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,32,0.2666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,64,0.2693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,16,0.2688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,128,0.2807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,512,0.3074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,1024,0.3436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,4096,0.4329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,256,0.2903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,16384,0.5207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,2048,0.6486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,32768,0.6425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,8192,0.4650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,2,0.3952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,8,0.3984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,65536,0.8708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,32,0.3994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,4,0.4033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,16,0.3956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,64,0.3980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,256,0.4319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,512,0.4686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,1024,0.5436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,2048,1.1311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,128,0.4133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,16384,0.8700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,4096,0.7094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,32768,1.1022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,2,0.6943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,8,0.6832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,4,0.7012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,65536,1.6262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,32,0.6844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,64,0.6920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,16,0.6888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,256,0.7440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,128,0.7190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,512,0.8299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,1024,0.9713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,2048,2.0023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,4096,1.2808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,8192,1.3806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,2,0.1129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,16384,1.5924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,4,0.1163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,8,0.1171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,16,0.1170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,32768,2.0085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,32,0.1126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,128,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,256,0.1224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,64,0.1171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,1024,0.1230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,2048,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,512,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,4096,0.1424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,32768,0.1819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,16384,0.1573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,8192,0.1491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,65536,0.2457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,2,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,4,0.1233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,8,0.1260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,16,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,131072,0.3409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,32,0.1270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,64,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,128,0.1285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,1024,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,2048,0.2649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,512,0.1295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,8192,0.1572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,4096,0.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,16384,0.1680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,256,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,32768,0.1933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,2,0.1281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,65536,0.2521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,131072,0.3584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,8,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,16,0.1231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,4,0.1282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,32,0.1294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,64,0.1275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,128,0.1313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,256,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,512,0.1363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,1024,0.1363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,2048,0.2711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,4096,0.1601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,65536,0.2607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,8192,0.1641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,131072,0.3662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,32768,0.2011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,16,0.1272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,2,0.1260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,32,0.1265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,4,0.1263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,8,0.1267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,128,0.1313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,256,0.1343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,64,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,1024,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,2048,0.2722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,512,0.1384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,4096,0.1608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,8192,0.1651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,32768,0.2033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,16384,0.1763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,131072,0.3793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,65536,0.2643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,2,0.1282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,4,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,8,0.1288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,32,0.1280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,64,0.1282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,128,0.1359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,16,0.1294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,256,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,1024,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,4096,0.1719
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,512,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,8192,0.1792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,16384,0.1911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,32768,0.2181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,65536,0.2902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,2048,0.2856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,2,0.1350
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,131072,0.4190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,8,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,16,0.1335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,4,0.1336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,64,0.1349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,32,0.1359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,256,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,128,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,1024,0.1570
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,512,0.1467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,4096,0.1951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,2048,0.3058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,8192,0.2038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,16384,0.2171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,32768,0.2539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,131072,0.4831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,16384,0.1747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,2,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,65536,0.3360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,4,0.1458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,8,0.1465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,32,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,64,0.1471
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,128,0.1555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,16,0.1461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,1024,0.1979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,2048,0.3762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,8192,0.2743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,4096,0.2647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,16384,0.2941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,256,0.1584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,32768,0.3399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,65536,0.4396
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,512,0.1727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,2,0.1621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,131072,0.6322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,16,0.1629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,32,0.1627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,64,0.1606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,4,0.1625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,256,0.1853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,512,0.2101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,8,0.1647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,128,0.1769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,2048,0.4772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,8192,0.3831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,1024,0.2569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,4096,0.3608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,32768,0.4804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,2,0.2076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,4,0.2072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,8,0.2080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,16,0.2049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,16384,0.4059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,32,0.2073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,65536,0.6246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,128,0.2284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,64,0.2077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,256,0.2495
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,1024,0.3853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,2048,0.7877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,512,0.2933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,4096,0.5760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,8192,0.6039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,32768,0.7786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,16384,0.6605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,65536,1.0111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,2,0.3065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,131072,1.5671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,4,0.3093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,16,0.3068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,8,0.3090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,32,0.3067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,64,0.3043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,256,0.3929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,128,0.3474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,512,0.4856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,1024,0.6618
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,4096,1.0453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,8192,1.0836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,2048,1.4593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,16384,1.2066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,2,0.5166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,4,0.5161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,8,0.5150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,65536,1.9418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,16,0.5149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,32,0.5128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,32768,1.4377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,64,0.5160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,128,0.6002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,256,0.6883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,1024,1.2273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,512,0.8767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,4096,1.9304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,16384,2.2681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,2048,2.6829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,2,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,4,0.0940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,32768,2.6752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,8,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,16,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,8192,2.0633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,32,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,64,0.0946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,256,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,512,0.0978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,1024,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,128,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,4096,0.1183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,32768,0.1580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,16384,0.1294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,131072,0.3176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,2048,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,2,0.1046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,65536,0.2172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,4,0.1027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,8,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,64,0.1026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,16,0.1018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,32,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,512,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,128,0.1066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,256,0.1063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,2048,0.1143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,8192,0.1302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,16384,0.1407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,1024,0.1090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,32768,0.1678
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,4096,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,2,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,4,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,65536,0.2274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,8,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,32,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,16,0.1045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,256,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,64,0.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,512,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,128,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,1024,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,2048,0.2456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,4096,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,8192,0.1332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,16384,0.1478
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,32768,0.1713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,65536,0.2310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,131072,0.3384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,2,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,4,0.1023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,8,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,16,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,8192,0.1222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,32,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,64,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,128,0.1058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,1024,0.1128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,512,0.1119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,4096,0.1359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,256,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,32768,0.1773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,2048,0.2473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,8192,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,16384,0.1509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,65536,0.2415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,4,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,131072,0.3564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,8,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,16,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,32,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,128,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,64,0.1064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,2,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,256,0.1137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,1024,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,4096,0.1407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,8192,0.1464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,2048,0.2532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,512,0.1190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,16384,0.1583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,65536,0.2572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,32768,0.1870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,4,0.1069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,8,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,131072,0.3889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,2,0.1085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,32,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,64,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,16,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,512,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,2048,0.2685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,1024,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,256,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,4096,0.1539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,16384,0.1775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,32768,0.2132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,65536,0.2969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,131072,0.4438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,8192,0.1616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,4,0.1150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,2,0.1137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,16,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,8,0.1143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,64,0.1150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,32,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,512,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,1024,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,2048,0.2945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,128,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,4096,0.1814
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,8192,0.1929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,256,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,16384,0.2111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,131072,0.5511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,32768,0.2603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,2,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,4,0.1281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,16,0.1263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,8,0.1286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,65536,0.3603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,32,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,64,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,128,0.1344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,512,0.1533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,1024,0.1879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,4096,0.2491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,256,0.1417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,2048,0.3625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,16384,0.3171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,8192,0.2716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,32768,0.3640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,131072,0.7792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,2,0.1569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,4,0.1576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,8,0.1568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,128,0.1178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,65536,0.5115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,16,0.1573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,32,0.1554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,256,0.1808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,128,0.1700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,2048,0.5984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,64,0.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,4096,0.3818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,16384,0.4633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,1024,0.2588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,512,0.2107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,65536,0.8184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,2,0.2233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,4,0.2218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,131072,1.3700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,32768,0.5841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,16,0.2224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,8192,0.4036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,8,0.2222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,128,0.2419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,64,0.2206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,32,0.2296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,1024,0.4151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,2048,1.0537
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,512,0.3180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,8192,0.6931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,4096,0.6477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,256,0.2654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,2,0.3595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,4,0.3601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,32768,1.0454
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,32,0.3589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,16384,0.8104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,64,0.3572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,65536,1.5390
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,128,0.4056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,16,0.3612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,8,0.3597
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,256,0.4708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,2048,1.8990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,512,0.5520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,8192,1.2879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,4096,1.1864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,1024,0.7514
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,4,0.0831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,8,0.0831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,2,0.0812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,32768,1.9210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,16,0.0820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,16384,1.5003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,64,0.0808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,512,0.0847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,128,0.0878
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,32,0.0805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,256,0.0858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,1024,0.0883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,2048,0.2180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,4096,0.1063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,16384,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,65536,0.2053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,131072,0.3027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,4,0.0914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,8192,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,16,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,32768,0.1415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,64,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,2,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,128,0.0924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,32,0.0906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,512,0.0937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,8,0.0924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,4096,0.1167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,8192,0.1179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,2048,0.2288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,32768,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,1024,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,256,0.0945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,131072,0.3171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,16384,0.1295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,65536,0.2160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,16,0.0896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,2,0.0901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,4,0.0914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,32,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,8,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,128,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,256,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,64,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,1024,0.0958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,8192,0.1180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,512,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,16384,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,32768,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,2048,0.2285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,65536,0.2165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,2,0.0903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,4096,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,4,0.0915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,32,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,16,0.0921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,64,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,256,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,512,0.0962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,128,0.0925
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,1024,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,131072,0.3208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,8192,0.1263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,16384,0.1372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,8,0.0913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,65536,0.2258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,32768,0.1606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,4096,0.1200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,4,0.0916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,2048,0.2334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,2,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,8,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,16,0.0906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,128,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,64,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,256,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,131072,0.3395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,1024,0.1020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,4096,0.1242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,512,0.1001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,16384,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,2048,0.2369
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,65536,0.2426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,32768,0.1698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,8192,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,32,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,4,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,2,0.0944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,8,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,64,0.0922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,128,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,16,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,32,0.0940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,1024,0.1081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,256,0.1003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,4096,0.1310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,512,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,16384,0.1540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,32768,0.1897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,65536,0.2725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,8192,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,2,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,4,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,8,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,2048,0.2456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,131072,0.8630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,16,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,256,0.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,128,0.1065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,64,0.1008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,1024,0.1233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,32,0.0984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,512,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,8192,0.1641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,2048,0.2671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,16384,0.1874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,65536,0.3350
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,4096,0.1559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,32768,0.2327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,4,0.1076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,8,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,131072,0.5235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,32,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,128,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,2,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,16,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,64,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,256,0.1184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,2048,0.3030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,512,0.1261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,4096,0.1917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,1024,0.1430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,8192,0.2096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,65536,0.4501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,131072,0.7229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,16384,0.2412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,32768,0.3074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,8,0.1282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,4,0.1294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,2,0.1291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,64,0.1295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,16,0.1331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,256,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,128,0.1395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,32,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,2048,0.4949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,512,0.1658
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,4096,0.2896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,8192,0.3135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,32768,0.4863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,16384,0.3690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,4,0.1776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,65536,0.7166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,2,0.1848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,131072,1.2715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,1024,0.1970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,16,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,8,0.1755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,128,0.2000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,256,0.2098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,64,0.1789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,1024,0.3033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,512,0.2405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,4096,0.4774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,2048,0.8888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,8192,0.5365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,32768,0.8763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,2,0.2757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,8,0.2740
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,65536,1.3678
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,16384,0.6394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,4,0.2755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,32,0.2897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,16,0.2756
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,128,0.3023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,64,0.2753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,256,0.3353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,512,0.3962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,2048,1.5549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,8192,0.9286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,16384,1.1399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,4096,0.8314
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,2,0.0707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,4,0.0698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,8,0.0702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,32768,1.5759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,1024,0.5154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,16,0.0705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,32,0.0712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,64,0.0709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,128,0.0726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,512,0.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,1024,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,4096,0.0926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,256,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,8192,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,2048,0.2053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,16384,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,65536,0.1909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,32768,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,2,0.0762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,8,0.0790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,4,0.0787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,16,0.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,32,0.0781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,32,0.1769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,64,0.0789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,131072,0.2943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,128,0.0773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,512,0.0807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,2048,0.2135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,4096,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,8192,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,16384,0.1169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,256,0.0804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,32768,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,4,0.0786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,8,0.0794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,16,0.0771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,1024,0.0804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,2,0.0783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,65536,0.2068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,128,0.0799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,64,0.0798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,512,0.0805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,256,0.0794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,1024,0.0836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,4096,0.1064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,2048,0.2168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,32,0.0776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,8192,0.1122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,65536,0.2107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,131072,0.3179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,2,0.0785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,16384,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,8,0.0809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,4,0.0779
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,32,0.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,64,0.0788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,16,0.0783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,128,0.0805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,256,0.0821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,1024,0.0895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,32768,0.1491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,4096,0.1129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,8192,0.1162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,512,0.0822
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,2048,0.2236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,16384,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,131072,0.3340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,65536,0.2174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,32768,0.1558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,4,0.0781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,16,0.0802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,2,0.0770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,64,0.0769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,32,0.0761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,128,0.0806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,512,0.0833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,1024,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,8,0.0786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,256,0.0828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,8192,0.1241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,2048,0.2294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,16384,0.1349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,65536,0.2353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,4096,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,4,0.0807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,131072,0.3624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,32768,0.1644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,16,0.0822
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,8,0.0807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,32,0.0806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,128,0.0853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,256,0.0857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,1024,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,2048,0.2368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,64,0.0804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,8192,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,4096,0.1252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,32768,0.1841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,16384,0.1473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,65536,0.2651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,131072,0.4140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,2,0.0912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,8,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,512,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,4,0.0900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,32,0.0900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,16,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,128,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,256,0.1048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,64,0.0922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,2048,0.2599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,1024,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,8192,0.1585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,4096,0.1487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,16384,0.1792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,512,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,32768,0.2256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,131072,0.5160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,65536,0.3262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,8,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,4,0.0978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,2,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,16,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,64,0.0997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,128,0.1056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,32,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,256,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,1024,0.1355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,512,0.1186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,2048,0.2979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,16384,0.2336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,4096,0.1867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,2,0.0815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,32768,0.3019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,65536,0.4445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,4,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,131072,0.7149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,8,0.1186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,2,0.1220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,16,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,8192,0.2033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,32,0.1201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,64,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,128,0.1283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,1024,0.1868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,2048,0.4890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,512,0.1554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,4096,0.2728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,8192,0.3003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,256,0.1374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,16384,0.3580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,65536,0.7044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,2,0.1704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,16,0.1604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,32768,0.4745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,131072,1.2621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,4,0.1609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,32,0.1617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,8,0.1627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,128,0.1787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,512,0.2268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,1024,0.2843
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,2048,0.8689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,4096,0.4611
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,64,0.1701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,8192,0.5118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,32768,0.8515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,16384,0.6230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,2,0.2421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,4,0.2435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,8,0.2597
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,256,0.1929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,65536,1.3508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,64,0.2446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,16,0.2449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,32,0.2430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,128,0.2717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,1024,0.4745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,256,0.3000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,4096,0.7921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,2048,1.5031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,8192,0.8930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,2,0.0664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,16384,1.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,32768,1.5358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,16,0.0647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,512,0.3766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,32,0.0644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,8,0.0646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,64,0.0652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,128,0.0648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,4,0.0663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,512,0.0664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,4096,0.0810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,256,0.0661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,2048,0.1993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,1024,0.0663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,65536,0.1812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,16384,0.0999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,131072,0.2845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,4,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,8192,0.0871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,16,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,2,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,32,0.0732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,64,0.0744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,8,0.0730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,512,0.0767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,1024,0.0764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,128,0.0748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,256,0.0747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,2048,0.2078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,4096,0.0924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,32768,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,8192,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,32768,0.1246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,65536,0.1983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,16384,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,8,0.0766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,131072,0.3049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,16,0.0744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,32,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,64,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,2,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,512,0.0762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,256,0.0763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,1024,0.0804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,4,0.0747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,4096,0.0981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,128,0.0748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,16384,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,2048,0.2092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,131072,0.3109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,2,0.0765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,65536,0.2070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,4,0.0759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,8192,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,16,0.0748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,32768,0.1456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,8,0.0766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,64,0.0762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,256,0.0771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,1024,0.0828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,2048,0.2158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,4096,0.1067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,128,0.0763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,8192,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,16384,0.1244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,65536,0.2177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,32768,0.1551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,2,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,4,0.0775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,131072,0.3344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,32,0.0749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,8,0.0772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,32,0.0769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,64,0.0763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,512,0.0789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,256,0.0785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,512,0.0804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,128,0.0796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,16,0.0766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,2048,0.2287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,8192,0.1227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,32768,0.1637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,1024,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,4096,0.1108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,65536,0.2359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,8,0.0771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,16384,0.1349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,4,0.0778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,2,0.0783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,32,0.0783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,64,0.0785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,256,0.0837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,128,0.0799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,512,0.0904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,2048,0.2380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,16,0.0778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,1024,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,8192,0.1310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,16384,0.1448
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,131072,0.3647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,131072,0.4124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,4096,0.1254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,32768,0.1829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,65536,0.2659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,4,0.0844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,16,0.0842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,8,0.0845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,2,0.0862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,64,0.0865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,256,0.1000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,32,0.0862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,128,0.0896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,512,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,1024,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,2048,0.2579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,16384,0.1762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,32768,0.2234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,65536,0.3247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,8192,0.1568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,4,0.0917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,8,0.0913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,4096,0.1459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,2,0.0892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,32,0.0920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,128,0.1005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,16,0.0917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,64,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,131072,0.5138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,1024,0.1322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,2048,0.2935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,256,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,8192,0.2014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,4096,0.1843
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,32768,0.2978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,512,0.1160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,131072,0.7115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,2,0.1145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,16384,0.2308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,8,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,4,0.1150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,16,0.1117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,32,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,64,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,512,0.1517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,65536,0.4402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,1024,0.1866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,128,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,256,0.1341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,4096,0.2675
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,8192,0.2966
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,2048,0.4796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,16384,0.3516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,2,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,65536,0.7050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,32768,0.4702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,8,0.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,131072,1.2567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,32,0.1542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,16,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,4,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,64,0.1546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,256,0.1874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,1024,0.2759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,512,0.2202
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,2048,0.8613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,8192,0.5044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,4096,0.4512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,2,0.2299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,32768,0.8455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,16384,0.6155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,4,0.2282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,128,0.1790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,16,0.2292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,32,0.2310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,64,0.2319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,8,0.2302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,65536,1.3702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,256,0.2895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,128,0.2574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,512,0.3634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,2048,1.4877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,4096,0.7755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,1024,0.4609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,2,0.0624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,8192,0.8743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,4,0.0604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,8,0.0600
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,32,0.0623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,32768,1.5271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,64,0.0622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,16384,1.0851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,128,0.0643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,16,0.0623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,512,0.0627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,256,0.0642
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,1024,0.0649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,2048,0.0699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,16384,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,4096,0.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,32768,0.1221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,8192,0.0846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,65536,0.1767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,2,0.0726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,8,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,4,0.0726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,32,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,16,0.0706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,131072,0.2765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,256,0.0726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,128,0.0714
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,512,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,2048,0.0807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,4096,0.0870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,16384,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,32768,0.1296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,65536,0.1933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,8192,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,131072,0.3012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,2,0.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,8,0.0722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,64,0.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,4,0.0704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,32,0.0706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,64,0.0726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,1024,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,256,0.0731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,16,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,128,0.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,512,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,1024,0.0762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,16384,0.1085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,4096,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,2048,0.2059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,8192,0.0981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,65536,0.2003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,2,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,131072,0.3127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,4,0.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,32768,0.1398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,16,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,8,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,32,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,256,0.0758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,128,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,1024,0.0788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,64,0.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,4096,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,512,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,8192,0.1006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,2048,0.2078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,65536,0.2156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,16384,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,4,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,131072,0.3297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,2,0.0724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,16,0.0724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,64,0.0733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,8,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,32,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,32768,0.1499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,512,0.0787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,2048,0.2205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,1024,0.0787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,4096,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,8192,0.1192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,128,0.0743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,16384,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,65536,0.2323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,32768,0.1605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,131072,0.3621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,2,0.0748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,8,0.0763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,256,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,32,0.0763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,4,0.0759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,128,0.0763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,512,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,256,0.0781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,64,0.0768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,1024,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,4096,0.1230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,8192,0.1303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,16384,0.1430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,16,0.0750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,131072,0.4128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,65536,0.2629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,4,0.0825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,2048,0.2361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,8,0.0834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,16,0.0828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,32,0.0824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,128,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,2,0.0827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,256,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,64,0.0832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,1024,0.1117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,32768,0.1810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,2048,0.2576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,4096,0.1456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,8192,0.1548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,512,0.1038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,16384,0.1747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,2,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,131072,0.5120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,32768,0.2218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,8,0.0885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,65536,0.3234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,32,0.0902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,64,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,4,0.0885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,128,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,256,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,1024,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,16,0.0891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,8192,0.1978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,512,0.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,2048,0.2928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,4096,0.1818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,65536,0.4385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,2,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,131072,0.7097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,8,0.1085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,32768,0.2948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,16384,0.2285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,16,0.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,32,0.1125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,4,0.1087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,128,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,256,0.1343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,1024,0.1772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,64,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,4096,0.2677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,16384,0.3487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,8192,0.2937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,32768,0.4733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,2048,0.4764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,4,0.1500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,512,0.1487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,131072,1.2553
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,8,0.1506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,16,0.1593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,65536,0.6957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,32,0.1504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,2,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,256,0.1862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,512,0.2162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,128,0.1674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,64,0.1513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,4096,0.4462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,2048,0.8590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,1024,0.2813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,8192,0.5010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,32768,0.8398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,16384,0.6108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,8,0.2227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,65536,1.3470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,16,0.2409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,2,0.2239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,32,0.2238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,4,0.2226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,128,0.2524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,64,0.2259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,1024,0.4688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,512,0.3375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,2048,1.4768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,8192,0.8662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,4096,0.7667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,256,0.2795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,2,0.0604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,8,0.0602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,16384,1.0790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,16,0.0605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,32,0.0600
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,64,0.0612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,128,0.0604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,256,0.0620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,512,0.0623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,4,0.0601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,2048,0.0670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,32768,1.5157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,8192,0.0802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,4096,0.0776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,1024,0.0643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,16384,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,131072,0.2771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,65536,0.1742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,8,0.0705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,32768,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,4,0.0694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,32,0.0703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,16,0.0726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,128,0.0706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,2,0.0703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,64,0.0705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,2048,0.0785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,256,0.0704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,512,0.0704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,4096,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,1024,0.0704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,8192,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,16384,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,131072,0.2957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,65536,0.1927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,4,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,32768,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,8,0.0702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,16,0.0709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,128,0.0722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,64,0.0705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,256,0.0727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,32,0.0704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,1024,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,2,0.0706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,2048,0.0805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,8192,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,4096,0.0906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,32768,0.1325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,16384,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,512,0.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,2,0.0705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,131072,0.3104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,16,0.0712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,8,0.0715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,65536,0.1970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,128,0.0726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,32,0.0743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,64,0.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,256,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,4,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,1024,0.0765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,8192,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,512,0.0724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,32768,0.1438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,65536,0.2128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,131072,0.3289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,4096,0.0901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,2,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,2048,0.2096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,16,0.0724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,8,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,64,0.0704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,16384,0.1105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,256,0.0741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,128,0.0762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,512,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,4,0.0707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,2048,0.2137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,32,0.0724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,4096,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,8192,0.1129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,32768,0.1614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,65536,0.2315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,16384,0.1315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,131072,0.3593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,4,0.0749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,1024,0.0787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,16,0.0739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,8,0.0726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,32,0.0743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,64,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,128,0.0765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,2,0.0743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,2048,0.2330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,512,0.0827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,4096,0.1220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,256,0.0763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,16384,0.1435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,65536,0.2607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,32768,0.1795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,1024,0.0923
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,2,0.0821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,8192,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,131072,0.4087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,16,0.0804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,4,0.0809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,8,0.0811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,32,0.0808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,64,0.0807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,128,0.0852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,512,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,2048,0.2555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,4096,0.1439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,16384,0.1731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,256,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,8192,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,1024,0.1108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,32768,0.2209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,65536,0.3225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,4,0.0856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,16,0.0865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,32,0.0879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,64,0.0872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,2,0.0867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,256,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,512,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,131072,0.5124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,8,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,1024,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,8192,0.1986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,2048,0.2920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,128,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,4096,0.1825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,32768,0.2974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,2,0.1065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,131072,0.7082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,4,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,8,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,65536,0.4396
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,16,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,128,0.1207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,256,0.1313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,16384,0.2291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,512,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,64,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,1024,0.1780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,32,0.1125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,8192,0.2924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,4096,0.2644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,32768,0.4684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,65536,0.6980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,16384,0.3498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,8,0.1496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,4,0.1481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,131072,1.2516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,2048,0.4780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,2,0.1567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,16,0.1490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,256,0.1831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,128,0.1667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,64,0.1496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,512,0.2142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,2048,0.8672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,32,0.1486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,16384,0.6076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,8192,0.4991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,32768,0.8402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,1024,0.2725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,2,0.2380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,4,0.2199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,4096,0.4469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,65536,1.3627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,8,0.2207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,16,0.2220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,64,0.2239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,128,0.2681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,32,0.2209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,512,0.3315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,4096,0.7607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,1024,0.4482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,256,0.2777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,8192,0.8649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,4,0.1588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,16384,1.0906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,32768,1.5124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,2,0.1619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,2048,1.4736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,16,0.1620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,8,0.1599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,32,0.1598
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,64,0.1586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,256,0.1621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,512,0.1646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,2048,0.2989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,4096,0.1838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,1024,0.1665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,128,0.1605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,32768,0.2229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,8192,0.1855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,2,0.1710
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,65536,0.2853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,16384,0.1960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,4,0.1708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,16,0.1699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,32,0.1705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,8,0.1684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,64,0.1697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,128,0.1723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,256,0.1700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,512,0.1729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,131072,0.3899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,2048,0.1831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,1024,0.1762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,4096,0.1935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,8192,0.1974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,65536,0.2954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,131072,0.3990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,16384,0.2110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,4,0.1715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,32768,0.2348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,8,0.1722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,16,0.1706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,64,0.1720
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,32,0.1709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,128,0.1738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,2,0.1737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,256,0.1725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,512,0.1758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,2048,0.3084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,4096,0.1991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,1024,0.1790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,32768,0.2374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,8192,0.2024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,16384,0.2144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,65536,0.2969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,131072,0.4021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,8,0.1732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,16,0.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,32,0.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,2,0.1717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,128,0.1739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,4,0.1743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,512,0.1768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,64,0.1729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,1024,0.1807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,2048,0.3126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,4096,0.1974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,256,0.1772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,16384,0.2157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,8192,0.2018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,65536,0.3050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,2,0.1762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,131072,0.4195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,4,0.1746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,8,0.1770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,32,0.1737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,16,0.1730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,64,0.1744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,256,0.1779
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,32768,0.2410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,1024,0.1827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,128,0.1784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,4096,0.2005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,512,0.1811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,16384,0.2199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,32768,0.2481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,8192,0.2078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,2048,0.3141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,2,0.1799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,65536,0.3189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,4,0.1810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,16,0.1810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,8,0.1807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,131072,0.4485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,32,0.1803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,64,0.1797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,128,0.1869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,512,0.1896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,2048,0.3247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,256,0.1883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,4096,0.2121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,16384,0.2317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,1024,0.1930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,32768,0.2682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,65536,0.3531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,131072,0.5008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,2,0.1943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,4,0.1942
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,16,0.1940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,8192,0.2218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,64,0.1951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,8,0.1933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,256,0.1959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,128,0.1966
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,1024,0.2036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,2048,0.3414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,512,0.1985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,4096,0.2279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,8192,0.2372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,16384,0.2592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,32,0.1951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,131072,0.5999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,2,0.2238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,32768,0.3054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,8,0.2236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,4,0.2252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,32,0.2249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,16,0.2247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,128,0.2264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,65536,0.4084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,64,0.2242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,512,0.2334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,2048,0.3884
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,1024,0.2400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,4096,0.2793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,256,0.2283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,8192,0.2954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,16384,0.3237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,32768,0.3919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,2,0.2712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,65536,0.5349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,4,0.2730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,8,0.2712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,32,0.2683
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,16,0.2728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,131072,0.8084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,64,0.2729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,512,0.2892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,1024,0.3053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,128,0.2731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,256,0.2796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,8192,0.3991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,4096,0.3688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,16384,0.4566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,32768,0.5744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,2,0.4118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,4,0.4117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,2048,0.5786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,131072,1.3592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,65536,0.8031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,16,0.4135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,8,0.4139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,32,0.4174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,128,0.4170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,64,0.4098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,512,0.4433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,2048,1.0110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,1024,0.4786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,256,0.4292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,16384,0.7652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,8192,0.6469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,2,0.7325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,32768,0.9890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,4,0.7204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,8,0.7263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,32,0.7353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,4096,0.5982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,64,0.7269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,65536,1.4822
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,128,0.7320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,16,0.7203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,256,0.7503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,512,0.7921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,2048,1.7850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,1024,0.8661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,4096,1.0636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,8192,1.1626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,2,0.1169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,16384,1.3687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,4,0.1166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,16,0.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,8,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,32,0.1184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,32768,1.7932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,128,0.1192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,512,0.1212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,256,0.1189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,1024,0.1207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,64,0.1180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,2048,0.2507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,16384,0.1510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,65536,0.2359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,8192,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,4,0.1264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,8,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,4096,0.1393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,32768,0.1803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,2,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,16,0.1254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,64,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,128,0.1271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,256,0.1260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,32,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,512,0.1265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,1024,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,2048,0.2606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,4096,0.1504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,8192,0.1531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,16384,0.1650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,65536,0.2503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,131072,0.3526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,2,0.1274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,32768,0.1863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,8,0.1289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,4,0.1260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,16,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,128,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,64,0.1275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,256,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,512,0.1302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,2048,0.2630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,32,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,16384,0.1646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,1024,0.1323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,32768,0.1915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,8192,0.1556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,131072,0.3584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,4096,0.1502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,4,0.1260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,65536,0.2518
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,2,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,32,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,16,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,128,0.1275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,256,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,512,0.1296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,1024,0.1315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,2048,0.2619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,64,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,8,0.1283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,4096,0.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,8192,0.1551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,32768,0.1916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,16384,0.1639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,131072,0.3701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,2,0.1303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,65536,0.2543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,4,0.1281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,16,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,8,0.1268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,32,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,64,0.1267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,128,0.1313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,512,0.1327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,2048,0.2696
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,1024,0.1363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,16384,0.1747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,4096,0.1564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,8192,0.1624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,32768,0.2058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,131072,0.4048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,4,0.1313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,2,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,8,0.1313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,65536,0.2752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,32,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,16,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,128,0.1331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,256,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,64,0.1309
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,1024,0.1425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,2048,0.2834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,8192,0.1778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,512,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,4096,0.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,16384,0.1933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,65536,0.3112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,32768,0.2275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,2,0.1428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,4,0.1418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,8,0.1433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,16,0.1432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,131072,0.4572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,64,0.1418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,128,0.1461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,32,0.1415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,256,0.1493
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,1024,0.1684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,2048,0.3193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,512,0.1556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,4096,0.2066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,16384,0.2378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,32768,0.2847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,65536,0.3889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,8192,0.2183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,2,0.1573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,131072,0.5741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,8,0.1545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,4,0.1546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,64,0.1539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,32,0.1560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,16,0.1568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,128,0.1648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,256,0.1692
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,2048,0.3780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,512,0.1796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,8192,0.2868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,1024,0.2025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,256,0.1311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,16384,0.3182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,4096,0.2700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,32768,0.3834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,2,0.1984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,131072,0.7958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,4,0.1972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,16,0.1984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,65536,0.5250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,64,0.1967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,128,0.2139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,256,0.2203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,8,0.1959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,32,0.1965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,1024,0.2833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,512,0.2379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,8192,0.4202
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,2048,0.6041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,4096,0.3895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,32768,0.5934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,65536,0.8235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,4,0.2892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,16384,0.4749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,2,0.2868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,8,0.2885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,16,0.2881
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,32,0.2884
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,64,0.2890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,1024,0.4627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,256,0.3334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,131072,1.3869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,128,0.3172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,2048,1.0937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,4096,0.6797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,512,0.3748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,16384,0.8422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,32768,1.0744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,8192,0.7332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,4,0.4823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,8,0.4833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,32,0.4816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,2,0.4841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,65536,1.5938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,128,0.5483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,64,0.4803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,256,0.5738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,512,0.6617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,16,0.4806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,1024,0.8401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,2048,1.9649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,8192,1.3476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,4096,1.2422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,4,0.0936
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,16384,1.5609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,8,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,2,0.0936
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,32,0.0941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,32768,1.9844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,128,0.0960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,16,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,512,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,64,0.0938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,256,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,1024,0.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,2048,0.2277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,8192,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,16384,0.1268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,65536,0.2132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,131072,0.3132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,2,0.1026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,4096,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,8,0.1025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,16,0.1038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,32,0.1022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,128,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,32768,0.1509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,4,0.1042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,512,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,64,0.1019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,2048,0.2346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,256,0.1024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,4096,0.1241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,16384,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,32768,0.1631
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,1024,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,131072,0.3229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,2,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,8192,0.1284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,8,0.1046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,16,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,4,0.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,64,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,256,0.1046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,512,0.1042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,65536,0.2224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,1024,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,32,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,2048,0.2391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,8192,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,128,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,16384,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,131072,0.3317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,4096,0.1267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,2,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,65536,0.2273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,8,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,16,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,32,0.1026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,4,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,64,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,256,0.1041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,128,0.1038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,32768,0.1673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,512,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,2048,0.2377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,1024,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,8192,0.1322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,65536,0.2335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,32768,0.1689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,16384,0.1435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,131072,0.3493
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,4,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,8,0.1061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,16,0.1057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,32,0.1067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,2,0.1048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,64,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,128,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,256,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,512,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,4096,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,2048,0.2416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,8192,0.1347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,16384,0.1467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,4096,0.1280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,65536,0.2481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,32768,0.1763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,2,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,4,0.1062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,8,0.1064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,131072,0.3773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,1024,0.1100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,64,0.1069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,256,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,16,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,32,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,512,0.1130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,128,0.1086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,1024,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,16384,0.1595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,2048,0.2512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,8192,0.1467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,65536,0.2789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,131072,0.4268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,4096,0.1398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,8,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,2,0.1125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,4,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,32768,0.1981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,128,0.1128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,256,0.1139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,512,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,32,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,16,0.1104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,2048,0.2667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,16384,0.1851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,8192,0.1647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,32768,0.2302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,65536,0.3326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,131072,0.5243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,64,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,4,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,1024,0.1239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,8,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,2,0.1241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,4096,0.1551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,64,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,128,0.1273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,16,0.1224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,32,0.1221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,1024,0.1506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,2048,0.3055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,256,0.1306
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,16384,0.2414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,512,0.1351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,32768,0.3068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,4096,0.1947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,8192,0.2100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,2,0.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,131072,0.7229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,8,0.1502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,32,0.1507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,4,0.1498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,65536,0.4512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,16,0.1506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,64,0.1494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,1024,0.2016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,128,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,512,0.1751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,4096,0.2794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,16384,0.3632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,2048,0.4901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,32768,0.4826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,8192,0.3051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,131072,1.2671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,2,0.2132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,65536,0.7103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,256,0.1639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,8,0.2120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,32,0.2116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,4,0.2115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,64,0.2114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,256,0.2331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,128,0.2265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,2048,0.8600
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,4096,0.4479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,16,0.2124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,16384,0.6110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,1024,0.3038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,8192,0.5004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,32768,0.8419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,4,0.3416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,16,0.3399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,8,0.3425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,512,0.2561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,65536,1.3434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,2,0.3409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,32,0.3399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,64,0.3385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,512,0.4384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,128,0.3727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,2048,1.5161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,8192,0.9025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,256,0.3903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,16384,1.1128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,2,0.0813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,1024,0.5353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,4,0.0821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,8,0.0825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,4096,0.7973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,16,0.0834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,32,0.0810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,64,0.0811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,128,0.0863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,32768,1.5455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,256,0.0846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,1024,0.0849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,2048,0.2172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,8192,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,4096,0.1043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,512,0.0858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,65536,0.2016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,32768,0.1428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,16384,0.1180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,16,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,8,0.0913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,4,0.0914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,64,0.0900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,32,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,128,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,512,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,1024,0.0938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,2048,0.2239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,4096,0.1122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,131072,0.3022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,256,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,2,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,8192,0.1184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,131072,0.3120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,16384,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,2,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,4,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,16,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,65536,0.2124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,64,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,128,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,32,0.0903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,32768,0.1517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,256,0.0915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,2048,0.2247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,4096,0.1117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,8192,0.1158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,16384,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,512,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,8,0.0921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,32768,0.1499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,65536,0.2141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,131072,0.3210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,4,0.0922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,8,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,16,0.0917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,32,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,128,0.0912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,64,0.0917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,512,0.0946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,1024,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,2048,0.2281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,4096,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,2,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,16384,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,256,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,65536,0.2204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,131072,0.3357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,2,0.0893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,4,0.0924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,32768,0.1566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,1024,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,8192,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,64,0.0893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,32,0.0900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,16,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,128,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,8,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,2048,0.2274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,4096,0.1148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,1024,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,256,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,8192,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,16384,0.1332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,512,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,65536,0.2332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,131072,0.3619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,8,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,32768,0.1653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,32,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,64,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,4,0.0934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,16,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,2,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,1024,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,2048,0.2312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,128,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,8192,0.1280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,512,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,4096,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,65536,0.2583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,16384,0.1407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,2,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,4,0.0965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,131072,0.4073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,32768,0.1774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,32,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,64,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,128,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,16,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,256,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,256,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,2048,0.2482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,1024,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,8192,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,16384,0.1667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,512,0.1061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,65536,0.3151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,32768,0.2118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,131072,0.5036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,4096,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,4,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,8,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,32,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,8,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,128,0.1057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,256,0.1085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,512,0.1139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,64,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,1024,0.1200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,2048,0.2662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,16,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,2,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,32768,0.2701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,16384,0.2054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,65536,0.4133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,4096,0.1569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,4,0.1239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,2,0.1251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,8,0.1234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,16,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,32,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,128,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,256,0.1336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,512,0.1406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,2048,0.4271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,1024,0.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,8192,0.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,4096,0.2160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,16384,0.3019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,64,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,8192,0.2467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,32768,0.4182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,131072,1.2042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,8,0.1688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,65536,0.6521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,16,0.1708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,2,0.1702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,32,0.1682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,4,0.1695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,64,0.1695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,256,0.1868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,1024,0.2343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,128,0.1769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,4096,0.3511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,512,0.2035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,2048,0.7635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,8192,0.4033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,16384,0.5172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,4,0.2650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,32768,0.7433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,2,0.2616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,8,0.2631
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,65536,1.2565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,16,0.2623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,64,0.2629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,32,0.2632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,128,0.2795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,256,0.2939
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,512,0.3255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,2048,1.3090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,1024,0.3867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,4096,0.5915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,16384,0.9080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,8192,0.6966
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,4,0.0731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,32768,1.3377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,2,0.0700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,16,0.0710
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,8,0.0691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,32,0.0709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,64,0.0715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,128,0.0741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,256,0.0713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,2048,0.2039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,512,0.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,8192,0.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,32768,0.1335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,4096,0.0882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,2,0.0780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,1024,0.0750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,65536,0.1895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,4,0.0808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,16,0.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,32,0.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,64,0.0809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,8,0.0793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,128,0.0783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,512,0.0785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,256,0.0810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,2048,0.0896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,4096,0.0980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,8192,0.1041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,16384,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,131072,0.2966
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,1024,0.0805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,32768,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,131072,0.3098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,4,0.0776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,8,0.0772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,16,0.0779
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,2,0.0787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,32,0.0793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,65536,0.2041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,256,0.0781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,512,0.0791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,64,0.0806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,2048,0.2139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,4096,0.1023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,8192,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,1024,0.0833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,16384,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,65536,0.2082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,131072,0.3142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,2,0.0788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,4,0.0783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,8,0.0786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,16,0.0820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,32768,0.1490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,64,0.0780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,128,0.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,32,0.0803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,256,0.0806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,128,0.0773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,1024,0.0856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,16384,0.1047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,512,0.0828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,8192,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,2048,0.2180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,16384,0.1231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,4096,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,65536,0.2144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,4,0.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,2,0.0786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,8,0.0764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,32,0.0764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,16,0.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,256,0.0805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,32768,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,1024,0.0851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,2048,0.2185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,131072,0.3283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,128,0.0797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,64,0.0789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,4096,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,8192,0.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,512,0.0805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,32768,0.1548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,16384,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,131072,0.3547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,65536,0.2257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,4,0.0824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,8,0.0788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,32,0.0795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,128,0.0823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,2,0.0808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,512,0.0870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,1024,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,256,0.0825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,16,0.0807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,4096,0.1120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,2048,0.2244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,16384,0.1349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,64,0.0805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,8192,0.1198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,65536,0.2522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,2,0.0898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,8,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,16,0.0886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,32768,0.1691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,4,0.0890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,131072,0.4005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,128,0.0922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,512,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,1024,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,2048,0.2389
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,32,0.0894
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,64,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,16384,0.1581
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,4096,0.1250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,256,0.0962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,131072,0.4953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,65536,0.3052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,2,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,4,0.0924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,8,0.0941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,16,0.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,8192,0.1368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,32768,0.2036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,32,0.0954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,512,0.1058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,256,0.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,128,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,1024,0.1166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,8192,0.1668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,4096,0.1509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,16384,0.1973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,32768,0.2640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,2048,0.2592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,2,0.1143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,64,0.0962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,4,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,16,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,131072,0.6792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,65536,0.4049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,32,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,64,0.1145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,256,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,128,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,8,0.1127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,4096,0.2050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,2048,0.4169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,8192,0.2332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,512,0.1321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,65536,0.6381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,32768,0.4076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,16384,0.2902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,1024,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,2,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,8,0.1531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,32,0.1518
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,16,0.1517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,131072,1.1952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,128,0.1636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,4,0.1526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,64,0.1530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,256,0.1695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,2048,0.7458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,4096,0.3348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,1024,0.2162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,32768,0.7263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,2,0.2263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,16384,0.4965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,8,0.2264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,65536,1.2527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,8192,0.3875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,512,0.1849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,16,0.2281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,32,0.2269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,4,0.2278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,128,0.2449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,512,0.2909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,2048,1.2708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,256,0.2600
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,1024,0.3494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,8192,0.6596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,4096,0.5545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,2,0.0659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,64,0.2280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,4,0.0645
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,32,0.0648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,64,0.0661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,32768,1.3054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,8,0.0661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,16,0.0628
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,128,0.0665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,256,0.0664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,16384,0.8711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,4096,0.0804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,1024,0.0685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,512,0.0680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,8192,0.0846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,65536,0.1832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,131072,0.2877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,32768,0.1207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,2048,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,8,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,2,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,32,0.0742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,4,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,16384,0.0984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,256,0.0744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,512,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,1024,0.0765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,2048,0.0844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,16,0.0748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,8192,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,4096,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,128,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,65536,0.1994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,32768,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,64,0.0743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,2,0.0730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,8,0.0777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,131072,0.3025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,16384,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,4,0.0766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,16,0.0747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,64,0.0744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,32,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,256,0.0774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,512,0.0777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,128,0.0742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,4096,0.0981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,1024,0.0777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,32768,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,65536,0.2028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,8192,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,2048,0.2093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,131072,0.3092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,16384,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,8,0.0752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,32,0.0743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,16,0.0747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,64,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,256,0.0763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,128,0.0744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,1024,0.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,2048,0.2118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,2,0.0765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,4,0.0759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,4096,0.0953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,32768,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,16384,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,512,0.0774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,8192,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,131072,0.3285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,2,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,4,0.0765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,32,0.0756
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,65536,0.2117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,64,0.0763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,128,0.0766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,256,0.0766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,16,0.0743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,1024,0.0807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,8,0.0732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,8192,0.1099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,16384,0.1248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,512,0.0785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,65536,0.2280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,2048,0.2152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,131072,0.3544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,4096,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,4,0.0787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,2,0.0767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,32,0.0763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,8,0.0756
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,32768,0.1542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,128,0.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,64,0.0775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,1024,0.0863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,512,0.0827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,256,0.0787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,16,0.0787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,4096,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,32768,0.1680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,16384,0.1312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,131072,0.4001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,65536,0.2508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,2048,0.2220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,4,0.0830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,16,0.0828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,8192,0.1183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,32,0.0836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,8,0.0838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,256,0.0867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,512,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,128,0.0850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,64,0.0828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,2,0.0844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,2048,0.2346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,4096,0.1228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,8192,0.1330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,1024,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,32768,0.2000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,16384,0.1538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,2,0.0899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,65536,0.3036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,131072,0.4943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,16,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,64,0.0890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,4,0.0886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,128,0.0926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,32,0.0872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,512,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,2048,0.2565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,1024,0.1125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,4096,0.1476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,16384,0.1941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,32768,0.2631
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,8,0.0870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,65536,0.4040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,256,0.0961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,8192,0.1649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,131072,0.6742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,2,0.1069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,16,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,8,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,64,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,128,0.1142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,512,0.1273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,4,0.1063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,1024,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,4096,0.2016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,2048,0.4151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,16384,0.2878
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,8192,0.2291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,65536,0.6340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,256,0.1180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,2,0.1465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,32,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,131072,1.1926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,4,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,8,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,16,0.1439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,64,0.1466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,128,0.1534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,512,0.1778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,32768,0.4045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,32,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,1024,0.2085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,2048,0.7371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,8192,0.3778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,16384,0.4870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,4096,0.3273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,4,0.2116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,65536,1.2151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,256,0.1622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,32768,0.7180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,16,0.2122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,32,0.2133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,64,0.2118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,8,0.2116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,128,0.2312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,256,0.2458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,512,0.2750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,1024,0.3340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,8192,0.6418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,2,0.2118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,2,0.0607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,16384,0.8536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,4,0.0623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,8,0.0622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,16,0.0602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,32768,1.2900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,2048,1.2573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,64,0.0622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,4096,0.5381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,32,0.0621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,128,0.0643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,1024,0.0662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,512,0.0622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,2048,0.0706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,8192,0.0827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,4096,0.0797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,32768,0.1205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,65536,0.1827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,131072,0.2760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,256,0.0643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,2,0.0724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,16,0.0712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,4,0.0703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,16384,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,64,0.0702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,128,0.0728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,512,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,32,0.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,256,0.0724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,2048,0.0795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,1024,0.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,8192,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,8,0.0705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,16384,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,4096,0.0866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,65536,0.1892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,2,0.0705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,8,0.0726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,16,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,32768,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,32,0.0705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,128,0.0705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,256,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,512,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,1024,0.0765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,2048,0.2043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,4,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,8192,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,64,0.0726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,131072,0.2946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,65536,0.1951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,131072,0.3088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,16384,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,4,0.0727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,2,0.0726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,8,0.0722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,16,0.0708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,4096,0.0867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,32768,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,256,0.0724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,512,0.0730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,32,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,2048,0.2060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,64,0.0726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,8192,0.0954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,128,0.0726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,32768,0.1431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,4096,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,1024,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,65536,0.2097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,16384,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,2,0.0727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,131072,0.3265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,8,0.0727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,16,0.0736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,128,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,32,0.0709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,64,0.0709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,512,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,4,0.0733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,2048,0.2098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,256,0.0742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,8192,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,32768,0.1521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,65536,0.2247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,1024,0.0783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,2,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,131072,0.3530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,4,0.0731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,16384,0.1199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,32,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,4096,0.0986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,64,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,256,0.0743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,128,0.0764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,1024,0.0825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,16,0.0768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,8,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,2048,0.2194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,16384,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,8192,0.1148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,4096,0.1064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,512,0.0788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,65536,0.2516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,2,0.0823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,4,0.0821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,131072,0.3964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,8,0.0804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,32,0.0806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,64,0.0824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,32768,0.1664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,256,0.0848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,16,0.0807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,512,0.0871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,4096,0.1239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,2048,0.2343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,128,0.0828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,8192,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,65536,0.3017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,16384,0.1534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,131072,0.4927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,32768,0.1989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,8,0.0866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,16,0.0845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,1024,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,32,0.0858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,64,0.0851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,4,0.0859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,512,0.0998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,256,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,128,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,4096,0.1459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,2,0.0851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,8192,0.1616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,2048,0.2546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,1024,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,65536,0.4029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,2,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,16384,0.1911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,4,0.1025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,8,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,16,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,128,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,131072,0.6726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,64,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,32,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,512,0.1254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,32768,0.2596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,256,0.1164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,2048,0.4100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,4096,0.1976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,1024,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,8192,0.2280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,2,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,65536,0.6317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,32768,0.4021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,4,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,16384,0.2841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,16,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,64,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,8,0.1403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,131072,1.1896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,512,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,256,0.1588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,1024,0.2061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,32,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,2048,0.7339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,4096,0.3183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,128,0.1507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,8192,0.3731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,2,0.2044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,4,0.2057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,65536,1.2326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,16384,0.4838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,32,0.2073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,32768,0.7133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,8,0.2040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,128,0.2243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,64,0.2054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,256,0.2381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,1024,0.3270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,512,0.2689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,4096,0.5287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,16,0.2057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,8192,0.6353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,2,0.0601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,16384,0.8462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,4,0.0602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,2048,1.2458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,8,0.0610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,32768,1.2759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,16,0.0599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,64,0.0599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,32,0.0605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,128,0.0601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,512,0.0623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,256,0.0619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,2048,0.0671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,1024,0.0643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,8192,0.0808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,16384,0.0912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,65536,0.1746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,2,0.0702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,4096,0.0777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,8,0.0724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,131072,0.2733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,32,0.0705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,64,0.0703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,16,0.0704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,4,0.0703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,1024,0.0724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,512,0.0704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,2048,0.2034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,4096,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,32768,0.1160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,16384,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,128,0.0731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,256,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,131072,0.2981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,8192,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,4,0.0705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,65536,0.1884
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,8,0.0705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,16,0.0705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,64,0.0704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,2,0.0702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,256,0.0724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,32768,0.1252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,128,0.0704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,512,0.0722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,4096,0.0867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,1024,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,2048,0.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,16384,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,32768,0.1306
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,32,0.0715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,131072,0.3050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,8192,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,4,0.0706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,16,0.0706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,8,0.0705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,64,0.0705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,32,0.0724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,2,0.0704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,256,0.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,512,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,1024,0.0744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,65536,0.1965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,4096,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,16384,0.1098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,8192,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,2048,0.2040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,131072,0.3230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,2,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,128,0.0724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,65536,0.2064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,16,0.0704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,32768,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,8,0.0726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,64,0.0724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,128,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,256,0.0704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,1024,0.0763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,512,0.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,32,0.0716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,8192,0.1043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,4,0.0722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,32768,0.1504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,65536,0.2206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,131072,0.3502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,2048,0.2080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,4,0.0728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,4096,0.0937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,2,0.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,8,0.0727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,32,0.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,16,0.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,64,0.0724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,128,0.0736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,1024,0.0804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,512,0.0785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,2048,0.2158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,256,0.0748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,8192,0.1146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,4096,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,16384,0.1286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,131072,0.3964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,65536,0.2491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,8,0.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,16384,0.1179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,2,0.0828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,32,0.0801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,16,0.0804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,64,0.0807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,4,0.0798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,32768,0.1647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,1024,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,128,0.0805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,256,0.0826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,2048,0.2345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,4096,0.1233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,16384,0.1513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,32768,0.1987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,8192,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,131072,0.4891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,2,0.0842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,512,0.0875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,4,0.0847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,65536,0.3008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,64,0.0842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,32,0.0847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,256,0.0905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,512,0.1002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,128,0.0885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,1024,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,4096,0.1452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,8192,0.1618
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,32768,0.2589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,2048,0.2531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,8,0.0835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,2,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,131072,0.6721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,4,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,8,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,16,0.0848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,16,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,16384,0.1940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,128,0.1089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,64,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,256,0.1142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,512,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,1024,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,32,0.1025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,4096,0.1975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,65536,0.4005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,16384,0.2821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,8192,0.2255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,65536,0.6288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,2,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,8,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,4,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,131072,1.1857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,32768,0.4010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,2048,0.4109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,32,0.1389
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,16,0.1391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,128,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,64,0.1399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,256,0.1565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,2048,0.7326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,8192,0.3720
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,16384,0.4823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,1024,0.2053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,32768,0.7110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,2,0.2035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,512,0.1734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,8,0.2020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,4,0.2035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,16,0.2045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,4096,0.3179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,32,0.2025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,65536,1.2360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,128,0.2218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,256,0.2358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,64,0.2050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,1024,0.3239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,4096,0.5260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,8192,0.6302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,512,0.2654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,16384,0.8433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,32768,1.2741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,8,0.2044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,2048,1.2427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,2,0.2093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,4,0.2267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,32,0.2194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,16,0.2206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,64,0.2059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,256,0.2080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,1024,0.2176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,128,0.2237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,512,0.2514
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,4096,0.2511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,2048,0.3631
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,8192,0.2511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,16384,0.2520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,131072,0.4433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,65536,0.3347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,32768,0.2698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,4,0.2416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,16,0.2286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,2,0.2330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,32,0.2162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,8,0.2346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,128,0.2285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,256,0.2165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,1024,0.2473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,512,0.2503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,2048,0.2474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,16384,0.2576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,64,0.2295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,4096,0.2507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,65536,0.3525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,32768,0.2777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,8192,0.2668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,4,0.1768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,8,0.1790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,131072,0.4706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,32,0.1791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,64,0.1751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,256,0.1845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,128,0.1851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,512,0.1901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,16,0.1799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,2,0.1791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,8192,0.2084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,1024,0.1977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,2048,0.3244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,32768,0.2440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,16384,0.2206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,4096,0.2039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,65536,0.3059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,2,0.1736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,16,0.1762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,64,0.1742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,32,0.1764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,8,0.1761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,4,0.1791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,256,0.1852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,1024,0.1928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,512,0.1909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,128,0.1831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,4096,0.2064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,8192,0.2143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,32768,0.2468
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,16384,0.2226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,131072,0.4269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,65536,0.3141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,2,0.1793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,2048,0.3206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,8,0.1790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,16,0.1773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,4,0.1829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,128,0.1850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,32,0.1783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,256,0.1881
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,64,0.1777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,1024,0.1902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,2048,0.3265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,512,0.1911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,4096,0.2118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,8192,0.2168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,16384,0.2301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,32768,0.2575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,65536,0.3271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,2,0.1759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,16,0.1775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,8,0.1770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,131072,0.4601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,32,0.1754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,64,0.1765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,128,0.1789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,4,0.1770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,512,0.1857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,256,0.1852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,2048,0.3273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,8192,0.2211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,16384,0.2349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,4096,0.2135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,1024,0.1882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,65536,0.3579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,2,0.1808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,131072,0.5028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,32768,0.2726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,8,0.1807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,4,0.1812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,32,0.1802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,16,0.1813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,64,0.1831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,256,0.1876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,1024,0.2004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,128,0.1855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,4096,0.2347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,512,0.1940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,8192,0.2442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,65536,0.4158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,16384,0.2661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,2,0.2089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,131072,0.6040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,32768,0.3148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,4,0.2047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,8,0.2068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,16,0.2071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,32,0.2076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,256,0.2189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,128,0.2134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,64,0.2077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,1024,0.2498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,2048,0.4119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,4096,0.3022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,8192,0.3192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,512,0.2268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,16384,0.3478
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,32768,0.4161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,65536,0.5593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,2,0.2667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,8,0.2665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,4,0.2661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,16,0.2676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,32,0.2675
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,131072,0.8308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,64,0.2680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,128,0.2800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,256,0.2866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,512,0.3070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,1024,0.3455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,2048,0.6517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,4096,0.4337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,16384,0.5238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,8192,0.4657
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,65536,0.8717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,32768,0.6452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,131072,1.4270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,2,0.3858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,16,0.3851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,4,0.3834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,8,0.3846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,32,0.3864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,64,0.3878
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,256,0.4212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,512,0.4585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,128,0.4082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,1024,0.5311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,2048,1.1162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,4096,0.7020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,8192,0.7538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,16384,0.8648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,32768,1.0959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,4,0.6827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,8,0.6769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,2048,0.3483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,65536,1.6224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,16,0.6790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,2,0.6809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,32,0.6770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,64,0.6825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,128,0.7101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,256,0.7408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,1024,0.9561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,4096,1.2742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,8192,1.3776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,512,0.8052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,2,0.1611
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,2048,1.9980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,32768,2.0283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,4,0.1632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,16384,1.5924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,16,0.1595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,8,0.1716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,64,0.1595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,128,0.1583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,1024,0.1660
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,512,0.1697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,32,0.1685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,2048,0.3042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,256,0.1674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,4096,0.1967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,8192,0.1870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,16384,0.1999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,32768,0.2291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,2,0.1733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,8,0.1783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,65536,0.2786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,32,0.1647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,4,0.1687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,128,0.1747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,16,0.1673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,256,0.1808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,512,0.1670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,2048,0.3144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,4096,0.1975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,1024,0.1726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,16384,0.2214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,64,0.1726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,8192,0.1960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,2,0.1665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,65536,0.2940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,8,0.1731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,131072,0.3997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,32768,0.2319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,128,0.1738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,4,0.1831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,256,0.1793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,512,0.1812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,1024,0.1739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,64,0.1659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,32,0.1645
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,8192,0.2109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,2048,0.3130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,32768,0.2351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,4096,0.2122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,16384,0.2276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,2,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,131072,0.4053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,65536,0.2971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,4,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,16,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,8,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,64,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,128,0.1455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,32,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,256,0.1481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,2048,0.2853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,1024,0.1586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,512,0.1522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,8192,0.1805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,16384,0.1934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,4096,0.1746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,131072,0.3985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,2,0.1425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,65536,0.2791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,32768,0.2165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,8,0.1447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,4,0.1438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,16,0.1455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,128,0.1486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,32,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,512,0.1566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,64,0.1432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,1024,0.1630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,2048,0.3028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,256,0.1547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,4096,0.1857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,32768,0.2324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,8192,0.1967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,65536,0.3058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,16384,0.2040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,2,0.1383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,131072,0.4374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,8,0.1366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,16,0.1371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,64,0.1373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,128,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,4,0.1370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,32,0.1387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,256,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,512,0.1489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,4096,0.2000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,2048,0.3121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,32768,0.2565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,16384,0.2203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,65536,0.3411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,2,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,4,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,131072,0.4874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,8,0.1436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,16,0.1424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,32,0.1436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,1024,0.1616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,64,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,256,0.1557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,128,0.1500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,1024,0.1962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,8192,0.2713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,512,0.1702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,2048,0.3742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,4096,0.2600
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,65536,0.4366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,131072,0.6287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,2,0.1679
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,4,0.1671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,32768,0.3360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,16,0.1668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,8,0.1667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,64,0.1660
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,32,0.1676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,128,0.1816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,256,0.1917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,1024,0.2654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,512,0.2134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,16384,0.2885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,4096,0.3769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,8192,0.3919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,16384,0.4199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,65536,0.6299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,2048,0.4762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,32768,0.4801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,2,0.2217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,8,0.2205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,131072,0.9001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,16,0.2199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,4,0.2221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,64,0.2213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,32,0.2208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,128,0.2436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,256,0.2633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,1024,0.4001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,512,0.3083
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,2048,0.8058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,16384,0.6779
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,4096,0.5911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,8192,0.6193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,32768,0.7965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,65536,1.0206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,4,0.3257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,131072,1.5765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,2,0.3269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,8,0.3258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,32,0.3245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,16,0.3265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,64,0.3244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,128,0.3662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,1024,0.6799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,512,0.5032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,256,0.4126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,8192,0.2068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,2048,1.4804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,4096,1.0638
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,16384,1.2142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,8192,1.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,8,0.5698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,16,0.5742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,65536,1.9529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,32768,1.4433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,2,0.5634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,128,0.6576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,64,0.5627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,32,0.5605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,4,0.5694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,512,0.9277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,256,0.7362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,2048,2.7329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,4096,2.0054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,1024,1.2855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,8192,2.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,2,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,32768,2.7303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,16384,2.3001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,8,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,32,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,16,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,128,0.1414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,4,0.1359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,64,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,256,0.1341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,4096,0.1556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,8192,0.1662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,2048,0.2640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,32768,0.1920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,512,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,65536,0.2513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,16384,0.1685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,4,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,2,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,1024,0.1394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,32,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,8,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,16,0.1435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,64,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,512,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,128,0.1394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,2048,0.2733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,256,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,4096,0.1638
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,8192,0.1674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,16384,0.1803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,32768,0.2008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,65536,0.2620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,1024,0.1435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,2,0.1464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,131072,0.3707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,8,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,16,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,64,0.1387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,4,0.1376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,256,0.1497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,512,0.1486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,32,0.1385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,128,0.1454
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,1024,0.1474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,4096,0.1671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,16384,0.1861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,2048,0.2793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,65536,0.2750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,131072,0.3751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,2,0.1341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,4,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,8,0.1330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,32768,0.2076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,8192,0.1796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,16,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,128,0.1397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,32,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,256,0.1385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,512,0.1424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,2048,0.2833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,1024,0.1544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,8192,0.1721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,64,0.1333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,4096,0.1701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,32768,0.2129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,16384,0.1831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,2,0.1247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,131072,0.3950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,4,0.1233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,16,0.1263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,8,0.1263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,65536,0.2731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,64,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,128,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,512,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,2048,0.2745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,1024,0.1413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,8192,0.1655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,16384,0.1824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,32,0.1234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,32768,0.2089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,131072,0.4097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,4096,0.1601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,4,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,2,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,16,0.1150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,32,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,64,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,8,0.1146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,65536,0.2794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,512,0.1322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,128,0.1247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,1024,0.1399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,2048,0.2798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,4096,0.1630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,16384,0.1862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,256,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,32768,0.2224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,65536,0.3049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,131072,0.4551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,8192,0.1729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,4,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,16,0.1185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,32,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,2,0.1190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,64,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,128,0.1251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,8,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,1024,0.1461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,2048,0.2989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,512,0.1333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,8192,0.1962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,256,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,16384,0.2175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,65536,0.3652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,32768,0.2637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,131072,0.5558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,2,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,4096,0.1868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,8,0.1385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,4,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,16,0.1377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,32,0.1397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,64,0.1374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,256,0.1544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,512,0.1673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,128,0.1466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,2048,0.3733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,256,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,1024,0.1945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,4096,0.2651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,16384,0.3108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,65536,0.5219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,8192,0.2785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,131072,0.7956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,4,0.1789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,16,0.1810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,2,0.1809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,32768,0.3797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,32,0.1803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,64,0.1799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,8,0.1811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,512,0.2321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,128,0.1944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,1024,0.2817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,8192,0.4351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,2048,0.6176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,256,0.2059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,32768,0.6108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,4096,0.4033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,65536,0.8381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,2,0.2589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,16384,0.4948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,4,0.2591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,8,0.2597
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,32,0.2584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,128,0.2810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,64,0.2587
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,16,0.2594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,1024,0.4530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,256,0.3083
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,2048,1.0937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,131072,1.3924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,4096,0.6849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,512,0.3560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,16384,0.8482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,8,0.4255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,32768,1.0781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,65536,1.5829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,2,0.4199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,4,0.4249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,16,0.4248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,8192,0.7359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,64,0.4242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,32,0.4225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,256,0.5189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,2048,1.9600
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,1024,0.8077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,512,0.6198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,4096,1.2494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,16384,1.5636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,8192,1.3553
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,128,0.4673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,32768,1.9979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,2,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,8,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,4,0.1086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,16,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,64,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,1024,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,256,0.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,512,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,8192,0.1351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,32,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,2048,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,128,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,4096,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,16384,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,131072,0.3262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,2,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,4,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,32768,0.1704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,8,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,65536,0.2308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,128,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,256,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,16,0.1241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,32,0.1198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,512,0.1199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,64,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,2048,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,4096,0.1413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,8192,0.1417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,1024,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,65536,0.2433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,4,0.1212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,32768,0.1803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,8,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,16,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,64,0.1219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,2,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,128,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,131072,0.3456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,256,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,16384,0.1572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,512,0.1211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,4096,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,32,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,1024,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,16384,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,65536,0.2497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,2048,0.2545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,8192,0.1470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,8,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,2,0.1172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,32,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,32768,0.1938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,64,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,16,0.1142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,256,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,128,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,4,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,1024,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,4096,0.1404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,8192,0.1504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,16384,0.1647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,2048,0.2542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,512,0.1168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,32768,0.1934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,65536,0.2626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,4,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,2,0.1161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,16,0.1158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,32,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,8,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,131072,0.3715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,256,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,128,0.1161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,1024,0.1334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,512,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,4096,0.1523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,2048,0.2678
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,8192,0.1625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,64,0.1206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,32768,0.2039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,131072,0.4020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,16384,0.1731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,65536,0.2746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,4,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,2,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,8,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,32,0.1002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,64,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,128,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,16,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,1024,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,2048,0.2592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,256,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,4096,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,16384,0.1694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,32768,0.2062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,8192,0.1564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,131072,0.4386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,8,0.0986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,16,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,32,0.0998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,512,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,65536,0.2881
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,2,0.1001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,128,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,64,0.1024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,256,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,2048,0.2762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,512,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,8192,0.1735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,4096,0.1657
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,32768,0.2416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,16384,0.1953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,65536,0.3446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,2,0.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,4,0.1128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,131072,0.5325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,16,0.1139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,32,0.1148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,1024,0.1315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,128,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,64,0.1128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,256,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,1024,0.1560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,512,0.1405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,4096,0.2098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,2048,0.3173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,16384,0.2560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,8,0.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,32768,0.3231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,8192,0.2235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,131072,0.7340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,65536,0.4639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,4,0.1531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,8,0.1577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,2,0.1517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,32,0.1556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,16,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,64,0.1581
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,256,0.1778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,128,0.1674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,512,0.1960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,2048,0.5263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,4096,0.3146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,16384,0.4008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,1024,0.2268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,32768,0.5168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,4,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,8192,0.3445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,131072,1.3085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,8,0.2232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,65536,0.7480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,2,0.2238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,4,0.2230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,16,0.2226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,64,0.2240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,32,0.2228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,256,0.2608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,128,0.2419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,2048,0.9368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,4096,0.5267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,512,0.2937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,8192,0.5792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,32768,0.9238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,2,0.3616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,8,0.3597
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,65536,1.4277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,16384,0.6880
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,4,0.3611
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,16,0.3601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,1024,0.3516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,32,0.3620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,128,0.3881
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,512,0.4788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,1024,0.6075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,64,0.3582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,2048,1.6239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,256,0.4201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,16384,1.2260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,2,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,4,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,32768,1.6507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,16,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,4096,0.9138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,64,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,128,0.1041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,8192,1.0107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,8,0.1048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,32,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,256,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,512,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,2048,0.2425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,8192,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,16384,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,4096,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,1024,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,2,0.1142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,65536,0.2197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,4,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,32768,0.1593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,131072,0.3226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,16,0.1086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,256,0.1128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,128,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,512,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,1024,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,2048,0.1165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,4096,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,64,0.1140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,16384,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,8,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,65536,0.2245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,32768,0.1666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,2,0.1185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,32,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,8,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,16,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,32,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,64,0.1178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,128,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,131072,0.3334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,256,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,512,0.1127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,2048,0.2532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,1024,0.1149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,8192,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,4,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,8192,0.1335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,4096,0.1308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,131072,0.3503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,2,0.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,16384,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,4,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,32768,0.1701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,16,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,32,0.1082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,65536,0.2431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,8,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,128,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,1024,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,2048,0.2423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,256,0.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,512,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,16384,0.1471
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,32768,0.1800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,8192,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,65536,0.2471
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,4096,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,64,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,131072,0.3608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,16,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,8,0.1086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,32,0.1088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,4,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,2,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,64,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,128,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,2048,0.2496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,4096,0.1414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,1024,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,8192,0.1528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,32768,0.1958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,16384,0.1640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,512,0.1108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,2,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,65536,0.2652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,131072,0.3938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,256,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,16,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,8,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,128,0.1017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,64,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,512,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,256,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,1024,0.1108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,4,0.1005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,4096,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,16384,0.1657
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,32,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,2048,0.2558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,65536,0.2870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,131072,0.4329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,2,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,8,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,8192,0.1537
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,32768,0.2024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,32,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,64,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,16,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,128,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,4,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,1024,0.1261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,256,0.1067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,2048,0.2712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,16384,0.1889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,32768,0.2352
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,65536,0.3370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,4096,0.1594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,2,0.1061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,512,0.1090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,131072,0.5281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,8192,0.1706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,32,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,8,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,128,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,64,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,4,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,256,0.1228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,512,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,2048,0.3145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,4096,0.2044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,8192,0.2207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,32768,0.3177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,16384,0.2532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,16,0.1069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,2,0.1406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,4,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,1024,0.1523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,131072,0.7313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,8,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,64,0.1461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,32,0.1395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,128,0.1601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,256,0.1705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,16,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,1024,0.2190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,2048,0.5181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,65536,0.4601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,4096,0.3060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,512,0.1885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,16384,0.3900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,8192,0.3358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,65536,0.7391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,4,0.2084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,2,0.2068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,32768,0.5090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,16,0.2101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,8,0.2080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,32,0.2117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,128,0.2295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,64,0.2155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,1024,0.3393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,131072,1.2888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,2048,0.9260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,4096,0.5156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,512,0.2828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,16384,0.6771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,256,0.2473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,4,0.3376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,2,0.3396
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,65536,1.4042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,32768,0.9083
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,8192,0.5649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,32,0.3364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,8,0.3373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,64,0.3387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,128,0.3680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,512,0.4575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,2048,1.5946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,16,0.3387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,8192,0.9818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,16384,1.2012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,4096,0.8821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,2,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,4,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,8,0.0984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,1024,0.5678
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,256,0.3981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,32768,1.6270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,32,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,64,0.0999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,128,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,256,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,512,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,8192,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,1024,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,2048,0.2258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,16384,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,32768,0.1559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,65536,0.2090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,16,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,131072,0.3160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,2,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,4,0.1041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,4096,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,16,0.1090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,8,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,256,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,64,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,1024,0.1063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,128,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,512,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,8192,0.1250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,16384,0.1359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,32,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,2048,0.2429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,131072,0.3231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,2,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,32768,0.1596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,4096,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,65536,0.2248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,4,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,32,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,128,0.1057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,64,0.1058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,256,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,512,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,2048,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,4096,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,8,0.1042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,8192,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,32768,0.1708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,16,0.1117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,1024,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,131072,0.3403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,4,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,8,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,16,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,32,0.1019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,64,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,2,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,512,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,16384,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,256,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,1024,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,65536,0.2252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,4096,0.1203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,2048,0.2382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,8192,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,65536,0.2418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,131072,0.3598
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,128,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,32768,0.1768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,16384,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,4,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,8,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,2,0.1017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,16,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,512,0.1064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,256,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,64,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,32,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,4096,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,8192,0.1374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,128,0.1086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,16384,0.1592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,65536,0.2613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,131072,0.3898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,2,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,32768,0.1886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,1024,0.1139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,16,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,2048,0.2419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,64,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,8,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,256,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,128,0.0955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,32,0.0938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,2048,0.2494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,4096,0.1394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,512,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,32768,0.1979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,4,0.0956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,8192,0.1480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,16384,0.1627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,1024,0.1062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,131072,0.4303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,2,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,65536,0.2823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,4,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,32,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,128,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,256,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,8,0.0926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,1024,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,512,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,64,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,8192,0.1664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,4096,0.1575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,16,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,16384,0.1871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,65536,0.3344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,32768,0.2349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,4,0.1045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,8,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,16,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,131072,0.5257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,64,0.1041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,128,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,256,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,2048,0.2691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,2,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,1024,0.1505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,4096,0.2028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,8192,0.2182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,32768,0.3152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,512,0.1342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,65536,0.4584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,16384,0.2484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,4,0.1359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,32,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,2,0.1356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,16,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,32,0.1365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,131072,0.7314
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,2048,0.3111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,512,0.1837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,8,0.1341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,64,0.1414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,128,0.1520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,256,0.1661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,4096,0.3036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,1024,0.2163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,32768,0.5060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,8192,0.3291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,2048,0.5154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,2,0.1991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,65536,0.7337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,4,0.2017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,16,0.2022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,8,0.2027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,32,0.2066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,64,0.2094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,256,0.2437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,512,0.2760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,131072,1.2879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,1024,0.3332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,2048,0.9218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,4096,0.5088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,8192,0.5590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,16384,0.3875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,16384,0.6711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,32768,0.9009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,2,0.3341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,8,0.3287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,128,0.2256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,32,0.3319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,65536,1.4230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,64,0.3341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,4,0.3283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,256,0.3852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,16,0.3305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,512,0.4421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,128,0.3624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,1024,0.5598
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,4096,0.8699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,8192,0.9738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,2048,1.5830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,8,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,32768,1.6204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,2,0.0933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,4,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,16,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,32,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,64,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,512,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,16384,1.1909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,128,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,256,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,8192,0.1147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,2048,0.1056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,4096,0.1060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,65536,0.2046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,32768,0.1486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,16384,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,1024,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,131072,0.3058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,2,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,4,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,64,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,8,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,512,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,128,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,1024,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,2048,0.2311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,4096,0.1148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,32,0.0974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,16,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,16384,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,256,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,32768,0.1617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,8192,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,4,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,131072,0.3222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,16,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,32,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,2,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,128,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,65536,0.2147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,256,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,8,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,512,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,64,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,1024,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,2048,0.2322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,32768,0.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,8192,0.1240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,65536,0.2218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,131072,0.3409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,16384,0.1326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,8,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,4096,0.1162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,16,0.1002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,2,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,64,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,4,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,32,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,128,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,1024,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,4096,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,8192,0.1200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,16384,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,256,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,65536,0.2388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,2048,0.2380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,131072,0.3568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,32768,0.1682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,4,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,16,0.1022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,32,0.1045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,512,0.1000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,256,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,8,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,512,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,128,0.1007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,1024,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,2048,0.2377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,8192,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,4096,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,32768,0.1890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,65536,0.2609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,2,0.0978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,131072,0.3864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,16384,0.1532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,8,0.0912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,4,0.0906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,16,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,32,0.0913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,64,0.0913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,64,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,256,0.0936
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,1024,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,2,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,4096,0.1356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,8192,0.1445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,2048,0.2448
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,512,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,65536,0.2785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,16384,0.1594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,4,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,128,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,8,0.0906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,2,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,131072,0.4260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,64,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,128,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,16,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,32768,0.1959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,512,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,256,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,1024,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,8192,0.1645
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,4096,0.1554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,16384,0.1852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,65536,0.3329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,32768,0.2327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,131072,0.5235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,2,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,8,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,4,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,32,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,32,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,64,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,128,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,256,0.1149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,1024,0.1488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,512,0.1321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,4096,0.2021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,16,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,8192,0.2172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,2048,0.3117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,32768,0.3147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,2048,0.2664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,131072,0.7280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,2,0.1332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,4,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,8,0.1327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,65536,0.4561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,16384,0.2489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,32,0.1345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,16,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,128,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,64,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,1024,0.2149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,2048,0.5110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,512,0.1820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,4096,0.3015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,16384,0.3882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,65536,0.7345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,32768,0.5055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,8192,0.3282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,2,0.1917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,4,0.1959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,131072,1.2903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,256,0.1654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,8,0.1976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,64,0.2055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,128,0.2219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,32,0.1995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,16,0.1976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,512,0.2725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,2048,0.9162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,4096,0.5054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,16384,0.6677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,8192,0.5551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,256,0.2407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,32768,0.9002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,1024,0.3299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,8,0.3275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,65536,1.3930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,2,0.3241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,4,0.3223
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,32,0.3272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,64,0.3305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,128,0.3563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,256,0.3843
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,16,0.3220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,2048,1.5778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,4096,0.8667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,1024,0.5510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,512,0.4376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,8192,0.9710
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,32768,1.6153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,16384,1.1824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,4,0.0876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,2,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,16,0.0887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,32,0.0872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,8,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,512,0.0900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,64,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,1024,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,2048,0.0962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,4096,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,16384,0.1188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,128,0.0893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,8192,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,65536,0.2091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,131072,0.3057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,2,0.0974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,8,0.0933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,256,0.0917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,32768,0.1477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,16,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,4,0.0937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,64,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,32,0.0982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,128,0.0979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,2048,0.1019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,256,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,4096,0.1141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,16384,0.1300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,512,0.0946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,1024,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,8192,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,32768,0.1517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,2,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,4,0.1028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,16,0.0986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,131072,0.3140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,32,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,65536,0.2108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,256,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,64,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,128,0.0974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,8,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,512,0.0974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,2048,0.2321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,4096,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,16384,0.1295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,65536,0.2169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,1024,0.1018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,32768,0.1570
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,8192,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,4,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,2,0.0954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,131072,0.3333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,8,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,32,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,128,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,64,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,256,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,16,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,4096,0.1170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,8192,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,1024,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,32768,0.1592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,16384,0.1315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,2048,0.2296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,2,0.1008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,512,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,65536,0.2346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,16,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,32,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,64,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,8,0.0962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,128,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,1024,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,131072,0.3537
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,2048,0.2402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,4,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,512,0.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,16384,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,32768,0.1851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,4096,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,256,0.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,2,0.0894
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,8192,0.1269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,131072,0.3868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,4,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,8,0.0890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,32,0.0891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,128,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,64,0.0880
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,512,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,256,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,65536,0.2569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,2048,0.2416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,8192,0.1418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,4096,0.1332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,32768,0.1943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,16,0.0901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,1024,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,65536,0.2760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,4,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,8,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,16,0.0899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,32,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,64,0.0904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,128,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,256,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,2,0.0893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,1024,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,16384,0.1578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,2048,0.2651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,4096,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,16384,0.1828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,8192,0.1647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,32768,0.2326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,131072,0.5205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,131072,0.4247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,2,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,8,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,16,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,65536,0.3312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,32,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,128,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,4,0.1007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,512,0.1288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,256,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,64,0.1002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,512,0.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,8192,0.2135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,1024,0.1469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,32768,0.3117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,65536,0.4527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,4096,0.1981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,131072,0.7258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,2048,0.3081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,4,0.1313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,8,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,16384,0.2463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,32,0.1303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,64,0.1332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,128,0.1449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,16,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,512,0.1830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,2,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,2048,0.5111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,1024,0.2128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,256,0.1629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,32768,0.5026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,8192,0.3255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,16384,0.3845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,65536,0.7320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,2,0.1885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,4,0.1890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,32,0.1936
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,8,0.1915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,131072,1.2872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,4096,0.2986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,16,0.1923
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,128,0.2201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,64,0.1995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,256,0.2343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,1024,0.3257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,4096,0.5018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,8192,0.5504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,512,0.2662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,16384,0.6627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,2,0.3173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,2048,0.9123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,4,0.3155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,32768,0.8914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,8,0.3186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,16,0.3203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,64,0.3226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,65536,1.4109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,128,0.3473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,256,0.3721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,512,0.4313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,2048,1.5723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,32,0.3205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,8192,0.9590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,4096,0.8609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,4,0.2183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,16384,1.1729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,8,0.2192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,32768,1.5990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,2,0.2196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,32,0.2274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,1024,0.5403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,16,0.2169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,128,0.2158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,256,0.2217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,512,0.2238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,1024,0.2355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,2048,0.2184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,64,0.2169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,16384,0.2623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,8192,0.2484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,32768,0.2688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,4096,0.2650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,2,0.2221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,131072,0.4533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,16,0.2180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,65536,0.3407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,4,0.2318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,32,0.2200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,64,0.2462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,256,0.2344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,8,0.2263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,128,0.2192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,2048,0.2384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,512,0.2202
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,4096,0.2400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,8192,0.2764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,16384,0.2704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,32768,0.2909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,1024,0.2310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,2,0.1776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,65536,0.3441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,8,0.1823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,32,0.1790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,4,0.1763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,16,0.1793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,64,0.1780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,512,0.1852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,131072,0.4554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,1024,0.1858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,128,0.1780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,4096,0.2033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,256,0.1786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,16384,0.2214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,2048,0.3114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,8192,0.2080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,131072,0.4088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,4,0.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,32768,0.2424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,8,0.1786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,2,0.1763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,65536,0.3051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,64,0.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,32,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,128,0.1751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,256,0.1800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,2048,0.3141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,4096,0.2001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,16,0.1735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,512,0.1848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,16384,0.2180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,32768,0.2425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,1024,0.1851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,131072,0.4198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,65536,0.3074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,2,0.1754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,8,0.1735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,32,0.1777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,64,0.1748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,8192,0.2055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,128,0.1797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,256,0.1798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,16,0.1793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,512,0.1800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,1024,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,2048,0.3166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,4,0.1804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,8192,0.2118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,16384,0.2221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,32768,0.2514
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,2,0.1720
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,65536,0.3178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,8,0.1713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,4,0.1719
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,16,0.1722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,131072,0.4525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,64,0.1715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,128,0.1783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,32,0.1731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,256,0.1774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,512,0.1803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,4096,0.2040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,8192,0.2097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,2048,0.3189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,16384,0.2246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,4096,0.2021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,1024,0.1809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,2,0.1787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,4,0.1805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,32768,0.2602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,16,0.1810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,8,0.1792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,131072,0.4932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,65536,0.3446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,128,0.1818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,512,0.1862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,32,0.1773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,256,0.1810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,64,0.1776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,4096,0.2135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,2048,0.3267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,8192,0.2240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,1024,0.1881
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,131072,0.5828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,65536,0.3942
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,32768,0.2905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,16384,0.2444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,4,0.2091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,8,0.2093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,16,0.2080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,2,0.2075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,32,0.2040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,512,0.2159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,64,0.2080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,256,0.2117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,2048,0.3697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,1024,0.2263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,128,0.2089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,4096,0.2629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,32768,0.3757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,8192,0.2773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,65536,0.5200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,4,0.2677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,16384,0.3074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,2,0.2705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,131072,0.7894
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,32,0.2667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,8,0.2689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,256,0.2774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,128,0.2716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,64,0.2691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,512,0.2865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,16,0.2680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,1024,0.3065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,8192,0.3955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,2048,0.5823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,16384,0.4545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,4096,0.3677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,32768,0.5744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,2,0.4015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,4,0.3984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,65536,0.8057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,16,0.4000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,8,0.3990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,64,0.4008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,256,0.4176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,131072,1.3599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,512,0.4328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,1024,0.4694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,128,0.4049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,2048,1.0021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,32,0.3983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,8192,0.6396
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,4096,0.5871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,2,0.7160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,32768,0.9837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,16384,0.7521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,8,0.7135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,16,0.7184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,65536,1.4928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,64,0.7146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,4,0.7137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,32,0.7150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,128,0.7300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,256,0.7445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,4096,1.0544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,2048,1.7758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,1024,0.8466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,8192,1.1568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,512,0.7761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,2,0.1654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,8,0.1582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,32768,1.8061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,16,0.1655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,4,0.1627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,16384,1.3717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,64,0.1708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,128,0.1674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,512,0.1621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,256,0.1623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,32,0.1662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,2048,0.2981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,8192,0.1915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,4096,0.1957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,16384,0.1925
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,65536,0.2785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,2,0.1701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,131072,0.3869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,8,0.1793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,4,0.1707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,16,0.1640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,32,0.1693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,1024,0.1664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,128,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,256,0.1786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,1024,0.1742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,2048,0.3029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,512,0.1725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,64,0.1697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,8192,0.1988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,16384,0.2165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,32768,0.2264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,32768,0.2279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,2,0.1743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,4096,0.1927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,4,0.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,65536,0.2892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,8,0.1797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,64,0.1725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,32,0.1625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,128,0.1762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,256,0.1743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,16,0.1670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,1024,0.1852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,4096,0.1875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,131072,0.3946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,8192,0.2025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,16384,0.2110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,2048,0.3053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,65536,0.3063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,2,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,131072,0.3957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,32768,0.2312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,4,0.1399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,512,0.1695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,32,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,16,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,256,0.1417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,512,0.1438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,8,0.1436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,128,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,1024,0.1444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,4096,0.1669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,16384,0.1802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,8192,0.1679
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,64,0.1417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,131072,0.3839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,32768,0.2096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,2048,0.2813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,4,0.1452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,8,0.1411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,2,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,16,0.1447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,128,0.1472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,65536,0.2684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,512,0.1480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,256,0.1455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,1024,0.1502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,32,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,2048,0.2879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,64,0.1459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,32768,0.2187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,4096,0.1698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,8192,0.1808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,65536,0.2908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,2,0.1370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,16384,0.1886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,4,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,8,0.1341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,64,0.1346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,128,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,16,0.1345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,256,0.1384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,131072,0.4188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,1024,0.1452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,32,0.1355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,4096,0.1767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,8192,0.1809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,512,0.1403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,2048,0.2871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,131072,0.4631
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,65536,0.3132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,4,0.1373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,16384,0.1976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,2,0.1390
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,32768,0.2322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,32,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,128,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,256,0.1458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,8,0.1391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,64,0.1372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,1024,0.1633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,16,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,512,0.1522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,4096,0.2041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,8192,0.2149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,65536,0.3835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,16384,0.2350
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,131072,0.5738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,2,0.1604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,4,0.1603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,8,0.1613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,32768,0.2819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,2048,0.3169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,32,0.1595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,128,0.1694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,16,0.1622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,256,0.1735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,2048,0.3847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,1024,0.2096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,4096,0.2750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,512,0.1856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,32768,0.3899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,64,0.1613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,16384,0.3240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,2,0.2115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,4,0.2113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,131072,0.8028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,8,0.2100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,65536,0.5322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,8192,0.2937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,32,0.2112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,16,0.2100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,64,0.2111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,128,0.2265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,512,0.2528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,1024,0.2961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,4096,0.4063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,256,0.2336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,16384,0.4919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,32768,0.6092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,2,0.3070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,65536,0.8375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,8192,0.4335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,131072,1.3949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,4,0.3059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,8,0.3060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,16,0.3069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,2048,0.6200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,512,0.3967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,64,0.3066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,128,0.3367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,256,0.3529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,2048,1.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,1024,0.4830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,32,0.3064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,8192,0.7552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,4096,0.7020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,65536,1.5936
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,8,0.5331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,32768,1.0962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,16384,0.8635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,2,0.5320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,32,0.5402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,4,0.5321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,64,0.5343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,16,0.5391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,128,0.5946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,512,0.7113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,4096,1.2956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,1024,0.8865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,16384,1.6144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,2048,2.0160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,256,0.6220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,2,0.1324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,8192,1.4007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,4,0.1336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,16,0.1323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,8,0.1290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,32768,2.0402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,256,0.1342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,32,0.2258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,512,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,1024,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,2048,0.2653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,128,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,8192,0.1669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,4096,0.1542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,65536,0.2531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,131072,0.3555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,32768,0.1913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,64,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,8,0.1407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,2,0.1370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,4,0.1458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,16384,0.1684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,128,0.1371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,256,0.1411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,1024,0.1490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,2048,0.2709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,64,0.1407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,8192,0.1665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,16,0.1379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,16384,0.1801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,512,0.1398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,131072,0.3629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,65536,0.2618
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,32768,0.1994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,2,0.1398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,8,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,4,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,16,0.1429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,4096,0.1594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,32,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,256,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,64,0.1389
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,1024,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,2048,0.2791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,4096,0.1650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,512,0.1485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,8192,0.1663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,128,0.1405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,32768,0.2063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,16384,0.1780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,2,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,131072,0.3714
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,4,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,16,0.1373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,8,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,64,0.1408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,128,0.1394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,32,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,512,0.1383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,256,0.1351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,65536,0.2616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,4096,0.1604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,2048,0.2801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,8192,0.1647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,1024,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,32768,0.2003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,65536,0.2639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,16384,0.1769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,2,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,4,0.1244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,131072,0.3853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,16,0.1242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,8,0.1248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,32,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,64,0.1251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,256,0.1249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,512,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,1024,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,128,0.1272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,2048,0.2633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,4096,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,32768,0.1992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,16384,0.1680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,131072,0.4004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,65536,0.2685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,8192,0.1585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,16,0.1164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,2,0.1148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,64,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,128,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,32,0.1149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,512,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,8,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,1024,0.1251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,4096,0.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,8192,0.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,2048,0.2610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,32768,0.2077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,16384,0.1729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,65536,0.2891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,256,0.1211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,131072,0.4370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,8,0.1162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,2,0.1160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,32,0.1159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,4,0.1165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,16,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,256,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,128,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,64,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,1024,0.1294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,512,0.1230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,2048,0.2708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,8192,0.1700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,32768,0.2380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,4096,0.1604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,65536,0.3394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,131072,0.5292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,4,0.1346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,2,0.1344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,16,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,32,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,8,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,64,0.1343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,128,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,512,0.1489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,16384,0.1895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,2048,0.3148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,256,0.1425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,1024,0.1610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,8192,0.2230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,4096,0.2066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,65536,0.4608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,32768,0.3208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,16384,0.2553
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,8,0.1742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,2,0.1736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,131072,0.7340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,16,0.1731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,64,0.1766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,4,0.1733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,32,0.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,128,0.1828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,256,0.1880
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,1024,0.2284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,4,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,2048,0.5146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,4096,0.3048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,8192,0.3300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,16384,0.3896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,65536,0.7363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,2,0.2482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,4,0.2490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,8,0.2487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,16,0.2490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,512,0.1999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,32,0.2495
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,64,0.2473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,131072,1.2915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,32768,0.5064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,128,0.2640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,512,0.2962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,256,0.2711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,1024,0.3464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,2048,0.9018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,16384,0.6496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,2,0.4048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,8192,0.5388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,32768,0.8811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,4096,0.4868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,8,0.4082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,16,0.4011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,65536,1.3792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,32,0.4048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,4,0.4058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,64,0.4044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,128,0.4377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,256,0.4553
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,2048,1.5789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,512,0.5002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,1024,0.5997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,16384,1.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,4096,0.8563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,2,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,4,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,16,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,32768,1.6060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,32,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,128,0.1143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,512,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,8192,0.9643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,256,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,2048,0.2404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,1024,0.1118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,64,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,4096,0.1285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,8192,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,65536,0.2275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,131072,0.3328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,4,0.1202
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,32768,0.1718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,8,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,16384,0.1543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,2,0.1163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,16,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,128,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,64,0.1212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,32,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,512,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,2048,0.2496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,4096,0.1364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,256,0.1201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,8192,0.1447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,16384,0.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,32768,0.1871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,65536,0.2376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,2,0.1187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,1024,0.1167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,4,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,16,0.1268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,32,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,8,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,64,0.1181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,128,0.1200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,256,0.1227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,512,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,2048,0.2511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,4096,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,8192,0.1433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,1024,0.1211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,32768,0.1898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,65536,0.2466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,131072,0.5046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,16384,0.1576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,2,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,4,0.1130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,16,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,32,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,64,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,8,0.1220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,128,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,512,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,256,0.1144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,4096,0.1431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,1024,0.1181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,32768,0.1874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,65536,0.2504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,16384,0.1564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,8192,0.1525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,2,0.1148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,131072,0.3705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,4,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,16,0.1139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,8,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,64,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,32,0.1206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,128,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,256,0.1160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,1024,0.1224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,512,0.1183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,4096,0.1451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,2048,0.2535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,8192,0.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,16384,0.1669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,65536,0.2680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,32768,0.1912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,2,0.1006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,131072,0.3975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,8,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,16,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,32,0.0998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,4,0.0984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,128,0.1043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,64,0.0977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,512,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,1024,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,8192,0.1436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,256,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,32768,0.1930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,65536,0.2756
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,16384,0.1578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,2048,0.2451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,2,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,4,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,8,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,131072,0.4243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,16,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,64,0.1008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,128,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,256,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,512,0.1108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,32,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,2048,0.2559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,1024,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,2048,0.2509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,8192,0.1538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,4096,0.1448
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,16384,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,32768,0.2199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,65536,0.3237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,2,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,4,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,16,0.1123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,32,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,64,0.1120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,131072,0.5118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,128,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,8,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,256,0.1209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,1024,0.1369
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,4096,0.1725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,512,0.1282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,16384,0.2199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,2048,0.2811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,65536,0.4279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,32768,0.2855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,2,0.1500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,4,0.1507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,16,0.1504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,131072,0.6989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,8192,0.1883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,32,0.1509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,128,0.1610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,8,0.1492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,512,0.1713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,64,0.1508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,256,0.1625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,4096,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,4096,0.2481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,2048,0.4591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,8192,0.2757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,1024,0.1886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,65536,0.6810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,32768,0.4506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,4,0.2159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,2,0.2192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,8,0.2181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,16384,0.3329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,128,0.2264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,256,0.2358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,16,0.2164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,512,0.2510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,1024,0.2837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,32,0.2176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,2048,0.8122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,16384,0.5655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,8192,0.4537
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,32768,0.7919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,2,0.3451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,4096,0.4013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,8,0.3470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,65536,1.3165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,64,0.2176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,4,0.3465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,16,0.3462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,32,0.3453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,64,0.3490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,128,0.3687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,256,0.3818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,512,0.4117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,4096,0.6722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,2048,1.3890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,8192,0.7816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,1024,0.4702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,4,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,16384,0.9938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,2,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,32768,1.4280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,8,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,16,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,64,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,32,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,256,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,128,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,512,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,2048,0.2349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,16384,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,8192,0.1253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,65536,0.2202
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,131072,0.3224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,2,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,4096,0.1184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,32768,0.1681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,4,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,8,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,1024,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,32,0.1084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,64,0.1158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,128,0.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,16,0.1076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,256,0.1100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,512,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,1024,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,2048,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,4096,0.1321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,32768,0.1666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,65536,0.2272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,16384,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,8192,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,4,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,2,0.1109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,8,0.1126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,131072,0.3326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,16,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,64,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,512,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,128,0.1129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,4096,0.1306
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,8192,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,1024,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,32768,0.1729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,256,0.1186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,65536,0.2348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,16384,0.1427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,2048,0.1223
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,4,0.1083
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,2,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,8,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,131072,0.3488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,32,0.1061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,16,0.1082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,128,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,64,0.1081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,512,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,2048,0.2416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,4096,0.1268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,1024,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,8192,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,32768,0.1813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,256,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,16384,0.1456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,2,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,65536,0.2408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,4,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,16,0.1081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,32,0.1079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,131072,0.3582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,8,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,256,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,128,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,1024,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,2048,0.2423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,512,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,8192,0.1408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,64,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,32768,0.1864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,65536,0.2546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,2,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,4096,0.1354
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,16384,0.1477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,131072,0.3877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,4,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,16,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,64,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,8,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,256,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,32,0.1119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,128,0.0982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,512,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,32,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,2048,0.2406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,4096,0.1281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,16384,0.1520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,65536,0.2722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,131072,0.4195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,8192,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,2,0.0934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,1024,0.1065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,4,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,32,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,64,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,8,0.0943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,32768,0.1887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,128,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,512,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,16,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,2048,0.2503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,8192,0.1480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,1024,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,256,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,16384,0.1685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,131072,0.5065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,2,0.1063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,4096,0.1379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,8,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,16,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,65536,0.3172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,64,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,32,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,32768,0.2159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,256,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,4,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,2048,0.2766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,512,0.1200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,128,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,16384,0.2142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,4096,0.1681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,65536,0.4230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,2,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,131072,0.6956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,8192,0.1828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,8,0.1363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,4,0.1365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,16,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,1024,0.1323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,32768,0.2804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,128,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,64,0.1375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,32,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,256,0.1561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,2048,0.4514
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,4096,0.2384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,8192,0.2667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,1024,0.1797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,16384,0.3238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,32768,0.4419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,512,0.1651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,8,0.1979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,65536,0.6732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,4,0.1974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,16,0.1982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,64,0.2024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,32,0.2013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,2,0.1972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,256,0.2249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,512,0.2387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,2048,0.8001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,4096,0.3893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,16384,0.5517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,1024,0.2709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,32768,0.7804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,2,0.3212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,128,0.2117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,65536,1.2857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,8192,0.4399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,4,0.3213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,8,0.3210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,16,0.3215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,32,0.3206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,128,0.3403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,64,0.3247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,512,0.3837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,1024,0.4441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,256,0.3582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,8192,0.7531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,16384,0.9689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,2,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,2048,1.3672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,32768,1.3958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,4,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,4096,0.6491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,16,0.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,32,0.0955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,8,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,512,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,64,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,1024,0.0954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,128,0.0954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,2048,0.2240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,256,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,8192,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,16384,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,65536,0.2107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,131072,0.3135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,2,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,4,0.1082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,4096,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,8,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,16,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,64,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,32,0.1047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,128,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,512,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,256,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,1024,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,4096,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,2048,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,8192,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,32768,0.1653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,16384,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,65536,0.2218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,8,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,4,0.1046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,32768,0.1495
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,131072,0.3279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,16,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,64,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,32,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,128,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,256,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,2048,0.2445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,1024,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,512,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,4096,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,16384,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,32768,0.1629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,65536,0.2323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,8192,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,2,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,4,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,131072,0.3413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,32,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,16,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,128,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,8,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,512,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,1024,0.1104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,2048,0.2365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,64,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,256,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,4096,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,8192,0.1274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,16384,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,32768,0.1722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,131072,0.3520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,4,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,2,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,16,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,32,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,64,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,65536,0.2354
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,256,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,512,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,1024,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,8,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,2048,0.2372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,8192,0.1304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,16384,0.1438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,32768,0.1842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,128,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,2,0.1062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,65536,0.2550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,4096,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,4,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,2,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,131072,0.3797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,8,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,64,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,32,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,512,0.0955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,1024,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,2048,0.2332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,4096,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,128,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,8192,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,16384,0.1480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,32768,0.1850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,65536,0.2678
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,16,0.0941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,256,0.0936
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,8,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,4,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,2,0.0923
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,64,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,128,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,131072,0.4153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,512,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,32,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,1024,0.1042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,4096,0.1364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,8192,0.1453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,2048,0.2505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,256,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,65536,0.3141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,16384,0.1655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,4,0.1027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,131072,0.5038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,32768,0.2139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,2,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,16,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,16,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,64,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,256,0.1067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,32,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,128,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,2048,0.2750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,512,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,1024,0.1300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,8,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,32768,0.2790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,65536,0.4215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,4096,0.1675
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,8192,0.1820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,2,0.1302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,8,0.1302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,4,0.1281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,16384,0.2115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,16,0.1325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,32,0.1323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,64,0.1310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,128,0.1397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,512,0.1612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,2048,0.4492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,1024,0.1775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,4096,0.2367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,16384,0.3212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,256,0.1487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,8192,0.2646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,32768,0.4388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,65536,0.6686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,2,0.1918
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,4,0.1914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,131072,1.2211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,16,0.1908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,8,0.1937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,32,0.1933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,64,0.1991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,128,0.2080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,512,0.2337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,256,0.2171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,4096,0.3817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,1024,0.2636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,16384,0.5445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,2048,0.7940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,32768,0.7749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,8192,0.4348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,2,0.3104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,8,0.3127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,4,0.3125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,65536,1.2723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,16,0.3131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,32,0.3113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,128,0.3304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,256,0.3447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,64,0.3121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,512,0.3735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,1024,0.4318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,8192,0.7428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,2048,1.3550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,16384,0.9538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,4096,0.6352
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,4,0.0899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,32,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,2,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,32768,1.3891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,256,0.0913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,64,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,128,0.0913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,512,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,16,0.0896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,2048,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,1024,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,4096,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,32768,0.1516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,8192,0.1104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,16384,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,65536,0.2081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,4,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,2,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,8,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,16,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,128,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,32,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,64,0.0998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,256,0.1024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,512,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,2048,0.1039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,1024,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,4096,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,16384,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,32768,0.1586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,8192,0.1219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,131072,0.3181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,2,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,4,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,65536,0.2138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,16,0.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,8,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,32,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,128,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,256,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,512,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,64,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,2048,0.2313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,4096,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,1024,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,8192,0.1223
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,16384,0.1330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,65536,0.2167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,8,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,131072,0.3384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,2,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,4,0.0981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,8,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,32768,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,128,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,64,0.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,512,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,16,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,32,0.0976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,1024,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,256,0.1004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,2048,0.2357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,4096,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,32768,0.1610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,65536,0.2299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,131072,0.3504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,2,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,16384,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,4,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,32,0.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,64,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,8192,0.1224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,128,0.1017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,8,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,256,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,512,0.1042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,4096,0.1198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,1024,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,2048,0.2344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,32768,0.1747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,8192,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,65536,0.2506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,16384,0.1415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,131072,0.3796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,8,0.0902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,4,0.0906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,16,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,2,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,128,0.0915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,64,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,256,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,32,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,512,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,1024,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,4096,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,2048,0.2286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,8192,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,16384,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,131072,0.4111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,65536,0.2637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,2,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,32768,0.1823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,8,0.0886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,4,0.0902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,32,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,64,0.0903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,128,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,512,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,16,0.0905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,256,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,1024,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,2048,0.2455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,16384,0.1623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,4096,0.1336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,65536,0.3121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,32768,0.2103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,131072,0.5019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,8192,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,8,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,16,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,2,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,16,0.0997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,32,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,64,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,128,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,4,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,256,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,4096,0.1645
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,8192,0.1811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,2048,0.2741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,16384,0.2130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,512,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,1024,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,4,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,131072,0.6920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,32768,0.2783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,65536,0.4200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,8,0.1284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,32,0.1290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,2,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,64,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,16,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,1024,0.1761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,256,0.1458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,4096,0.2353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,512,0.1615
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,16384,0.3194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,128,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,8192,0.2623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,2048,0.4450
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,32768,0.4377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,4,0.1850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,65536,0.6664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,16,0.1865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,8,0.1854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,32,0.1868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,64,0.1934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,131072,1.2217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,2,0.1880
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,1024,0.2607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,512,0.2311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,256,0.2142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,2048,0.7914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,128,0.2052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,4096,0.3787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,16384,0.5418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,32768,0.7707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,4,0.3063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,2,0.3074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,65536,1.2678
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,8192,0.4321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,8,0.3063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,32,0.3089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,16,0.3057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,128,0.3237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,64,0.3098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,1024,0.4257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,512,0.3709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,8192,0.7343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,2048,1.3453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,16384,0.9470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,4096,0.6291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,4,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,8,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,32768,1.3761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,16,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,2,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,32,0.0883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,256,0.3389
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,1024,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,128,0.0900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,256,0.0892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,64,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,4096,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,16384,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,32768,0.1460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,512,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,65536,0.2040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,131072,0.3070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,2048,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,8192,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,2,0.0933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,16,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,64,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,8,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,4,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,128,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,256,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,512,0.0944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,4096,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,2048,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,1024,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,8192,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,32768,0.1528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,131072,0.3143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,2,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,4,0.0960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,16384,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,32,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,16,0.0978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,64,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,128,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,65536,0.2077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,512,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,8,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,1024,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,4096,0.1123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,2048,0.2274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,8192,0.1190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,16384,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,32,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,65536,0.2158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,131072,0.3265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,32768,0.1592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,4,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,256,0.0981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,8,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,16,0.0942
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,32,0.0974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,2,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,64,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,128,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,512,0.0974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,1024,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,8192,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,2048,0.2282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,32768,0.1606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,65536,0.2291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,256,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,4096,0.1144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,2,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,4,0.0961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,131072,0.3467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,32,0.0965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,16,0.0981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,128,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,64,0.0983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,256,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,8,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,512,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,2048,0.2323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,16384,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,1024,0.1006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,16384,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,32768,0.1772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,65536,0.2459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,2,0.0887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,131072,0.3753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,4,0.0886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,8,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,4096,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,16,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,64,0.0879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,32,0.0890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,256,0.0885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,128,0.0894
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,2048,0.2274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,8192,0.1250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,4096,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,512,0.0919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,8192,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,1024,0.0937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,16384,0.1430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,65536,0.2622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,131072,0.4089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,2,0.0886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,16,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,8,0.0897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,32,0.0887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,4,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,32768,0.1796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,256,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,128,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,512,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,4096,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,8192,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,1024,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,16384,0.1621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,65536,0.3117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,32768,0.2090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,131072,0.4998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,2,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,2048,0.2435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,8,0.0979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,4,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,64,0.0890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,32,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,64,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,512,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,16,0.0987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,1024,0.1265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,128,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,8192,0.1773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,4096,0.1610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,16384,0.2070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,256,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,2048,0.2712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,131072,0.6903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,65536,0.4181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,32768,0.2749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,2,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,16,0.1268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,32,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,4,0.1241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,128,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,8,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,256,0.1435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,64,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,512,0.1584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,2048,0.4439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,8192,0.2605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,16384,0.3173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,4096,0.2302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,32768,0.4351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,1024,0.1745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,2,0.1773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,4,0.1835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,8,0.1792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,64,0.1865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,128,0.2009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,131072,1.2221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,16,0.1807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,256,0.2129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,65536,0.6640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,512,0.2262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,2048,0.7853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,32,0.1811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,4096,0.3747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,8192,0.4261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,16384,0.5360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,2,0.3023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,32768,0.7707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,4,0.3018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,1024,0.2573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,16,0.3022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,64,0.2978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,32,0.2979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,8,0.2974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,65536,1.2829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,256,0.3307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,512,0.3609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,128,0.3202
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,4096,0.6244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,1024,0.4208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,8192,0.7254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,4,0.1648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,2,0.1657
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,16384,0.9408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,32768,1.3711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,2048,1.3358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,16,0.1651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,8,0.1662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,32,0.1670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,128,0.1686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,64,0.1647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,256,0.1707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,1024,0.1758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,512,0.1742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,4096,0.1937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,8192,0.1988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,32768,0.2300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,131072,0.3951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,16384,0.2069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,2,0.1715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,2048,0.1809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,65536,0.2916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,32,0.1751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,16,0.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,128,0.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,64,0.1725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,4,0.1748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,8,0.1736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,1024,0.1841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,2048,0.1997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,512,0.1833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,8192,0.2073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,256,0.1790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,4096,0.2057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,16384,0.2151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,131072,0.3974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,4,0.1745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,2,0.1769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,8,0.1759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,16,0.1747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,32,0.1754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,32768,0.2387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,256,0.1831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,512,0.1870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,1024,0.1896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,128,0.1790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,65536,0.3011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,2048,0.1972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,64,0.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,8192,0.2069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,4096,0.2006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,65536,0.3000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,131072,0.4094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,4,0.1760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,8,0.1770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,16,0.1764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,2,0.1774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,32,0.1756
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,128,0.1829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,32768,0.2402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,16384,0.2159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,512,0.1904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,2048,0.2026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,4096,0.2050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,256,0.1896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,8192,0.2122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,16384,0.2232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,131072,0.4261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,1024,0.1960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,64,0.1771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,4,0.1808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,32768,0.2483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,8,0.1807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,2,0.1815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,16,0.1788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,65536,0.3099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,64,0.1805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,512,0.1922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,256,0.1863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,1024,0.1919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,2048,0.2064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,8192,0.2175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,32,0.1807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,4096,0.2096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,32768,0.2581
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,16384,0.2277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,131072,0.4555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,4,0.1862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,8,0.1849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,128,0.1847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,2,0.1875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,65536,0.3291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,64,0.1850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,128,0.1871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,256,0.1903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,512,0.1933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,32,0.1848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,1024,0.1993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,16,0.1858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,8192,0.2312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,2048,0.2188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,4096,0.2244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,16384,0.2441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,2,0.1906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,32768,0.2807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,131072,0.5134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,4,0.1898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,8,0.1904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,32,0.1909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,64,0.1905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,65536,0.3625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,256,0.1971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,128,0.1948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,2048,0.2404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,4096,0.2463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,512,0.2014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,16384,0.2774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,8192,0.2548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,16,0.1908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,131072,0.6142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,1024,0.2101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,4,0.2061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,8,0.2055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,32768,0.3237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,16,0.2056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,32,0.2054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,65536,0.4249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,128,0.2116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,2,0.2050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,256,0.2159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,64,0.2041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,2048,0.2933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,8192,0.3193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,4096,0.3016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,32768,0.4160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,65536,0.5574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,1024,0.2469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,16384,0.3483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,2,0.2403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,4,0.2402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,131072,0.8290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,512,0.2276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,32,0.2402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,128,0.2525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,64,0.2404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,256,0.2606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,1024,0.3200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,2048,0.3990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,512,0.2809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,16,0.2401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,8192,0.4414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,16384,0.4992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,8,0.2395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,32768,0.6178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,4096,0.4139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,65536,0.8505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,4,0.3151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,2,0.3149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,16,0.3165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,8,0.3150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,131072,1.4057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,64,0.3166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,32,0.3161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,512,0.3862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,256,0.3510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,2048,0.6037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,4096,0.6324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,128,0.3354
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,16384,0.7983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,8192,0.6844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,1024,0.4615
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,32768,1.0266
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,2,0.5310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,8,0.5335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,4,0.5313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,32,0.5332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,16,0.5327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,256,0.5916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,128,0.5612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,512,0.6623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,2048,1.0826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,64,0.5308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,8192,1.2361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,16384,1.4495
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,4,0.1270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,1024,0.8087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,65536,1.5219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,4096,1.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,32768,1.8819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,2,0.1280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,8,0.1270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,16,0.1273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,64,0.1285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,128,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,512,0.1328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,2048,0.1397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,32,0.1264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,1024,0.1346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,8192,0.1566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,32768,0.1912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,256,0.1325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,16384,0.1679
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,2,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,65536,0.2486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,4096,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,131072,0.3574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,16,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,4,0.1342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,64,0.1328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,256,0.1364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,128,0.1386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,32,0.1362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,8,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,4096,0.1605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,512,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,16384,0.1746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,32768,0.2013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,8192,0.1680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,65536,0.2608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,2048,0.1617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,4,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,8,0.1373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,16,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,32,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,1024,0.1436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,128,0.1376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,2,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,512,0.1439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,1024,0.1444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,64,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,4096,0.1651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,8192,0.1732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,16384,0.1821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,131072,0.3599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,32768,0.2076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,65536,0.2694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,256,0.1404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,131072,0.3700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,2048,0.1665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,16,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,8,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,32,0.1384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,128,0.1403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,256,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,64,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,1024,0.1502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,4,0.1387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,2,0.1379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,8192,0.1777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,2048,0.1688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,16384,0.1868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,65536,0.2783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,32768,0.2122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,2,0.1414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,4,0.1412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,8,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,131072,0.3951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,512,0.1488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,32,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,16,0.1415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,256,0.1520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,4096,0.1689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,64,0.1417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,1024,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,4096,0.1831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,2048,0.1792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,8192,0.1895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,16384,0.2006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,65536,0.3036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,512,0.1548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,131072,0.4319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,32768,0.2318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,8,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,128,0.1469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,32,0.1437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,16,0.1428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,128,0.1494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,256,0.1497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,64,0.1449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,1024,0.1669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,2,0.1437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,2048,0.2025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,4096,0.2074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,8192,0.2113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,4,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,512,0.1550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,131072,0.4963
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,32768,0.2629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,2,0.1509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,16384,0.2266
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,65536,0.3465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,16,0.1509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,8,0.1509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,64,0.1508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,32,0.1523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,128,0.1604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,512,0.1776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,1024,0.2053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,4096,0.2702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,8192,0.2803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,256,0.1632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,2048,0.2631
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,32768,0.3476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,4,0.1510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,131072,0.6379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,4,0.1658
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,65536,0.4494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,16384,0.3013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,8,0.1663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,16,0.1656
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,64,0.1663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,128,0.1801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,32,0.1662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,256,0.1901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,1024,0.2630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,4096,0.3681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,2048,0.3669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,8192,0.3921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,32768,0.4887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,16384,0.4211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,2,0.1669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,131072,0.8968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,4,0.1995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,65536,0.6241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,8,0.1988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,16,0.1979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,512,0.2145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,32,0.2008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,64,0.1983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,128,0.2220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,2,0.1992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,2048,0.5572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,1024,0.3809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,512,0.2878
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,16384,0.6567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,8192,0.6014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,4096,0.5730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,32768,0.7759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,2,0.2708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,4,0.2712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,131072,1.5634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,8,0.2724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,65536,1.0048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,16,0.2721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,256,0.2425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,64,0.2706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,128,0.3143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,32,0.2712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,256,0.3593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,1024,0.6296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,512,0.4485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,4096,0.9945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,16384,1.1742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,2,0.4417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,32768,1.4027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,8,0.4419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,65536,1.9105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,2048,0.9860
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,4,0.4423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,8192,1.0630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,16,0.4441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,128,0.5261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,256,0.6182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,32,0.4403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,512,0.7985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,64,0.4448
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,2048,1.8313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,8192,1.9614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,2,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,16384,2.2021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,1024,1.1453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,4,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,32768,2.6084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,4096,1.8824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,8,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,16,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,64,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,128,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,512,0.1057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,32,0.1005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,4096,0.1263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,2048,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,256,0.1041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,32768,0.1671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,1024,0.1080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,65536,0.2179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,16384,0.1345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,2,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,131072,0.3275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,4,0.1084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,16,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,32,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,8,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,256,0.1117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,512,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,8192,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,128,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,2048,0.1272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,8192,0.1404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,64,0.1090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,1024,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,16384,0.1450
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,65536,0.2368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,2,0.1102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,4,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,8,0.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,4096,0.1350
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,32,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,131072,0.3387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,64,0.1104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,128,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,32768,0.1722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,256,0.1099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,1024,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,16,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,512,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,16384,0.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,32768,0.1797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,2048,0.1280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,8192,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,4096,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,2,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,131072,0.3519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,65536,0.2461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,32,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,64,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,4,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,16,0.1106
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,512,0.1160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,128,0.1142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,1024,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,4096,0.1452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,8192,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,256,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,2048,0.1466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,16384,0.1605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,131072,0.3705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,8,0.1098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,2,0.1124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,32768,0.1918
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,16,0.1171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,32,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,65536,0.2562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,128,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,4,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,512,0.1239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,64,0.1118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,2048,0.1493
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,8,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,4096,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,1024,0.1336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,32768,0.2007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,8192,0.1605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,65536,0.2739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,131072,0.4031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,16384,0.1746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,8,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,256,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,32,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,2,0.1141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,64,0.1161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,16,0.1159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,512,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,1024,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,256,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,2048,0.1649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,4096,0.1701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,16384,0.1910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,32768,0.2272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,4,0.1191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,131072,0.4576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,65536,0.3076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,4,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,2,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,8192,0.1771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,32,0.1206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,128,0.1213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,64,0.1212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,128,0.1262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,512,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,16,0.1231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,1024,0.1553
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,2048,0.1928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,4096,0.1960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,8,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,256,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,8192,0.2067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,16384,0.2262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,32768,0.2762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,131072,0.5649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,4,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,8,0.1349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,16,0.1336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,64,0.1377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,2,0.1349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,65536,0.3742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,32,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,1024,0.1972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,128,0.1460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,2048,0.2602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,256,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,8192,0.2849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,16384,0.3139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,65536,0.5224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,32768,0.3790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,512,0.1670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,4096,0.2654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,4,0.1609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,16,0.1613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,32,0.1637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,8,0.1605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,64,0.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,2,0.1606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,128,0.1755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,131072,0.7985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,256,0.1888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,1024,0.2686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,8192,0.4154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,4096,0.3837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,16384,0.4737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,2048,0.3752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,2,0.2054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,65536,0.8221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,4,0.2056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,16,0.2060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,131072,1.3803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,32768,0.5941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,512,0.2149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,32,0.2058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,8,0.2055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,128,0.2299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,256,0.2544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,64,0.2053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,4096,0.6332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,512,0.3097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,2048,0.6048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,1024,0.4065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,2,0.3165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,32768,1.0267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,16384,0.7981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,8192,0.6871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,16,0.3181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,8,0.3160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,4,0.3163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,32,0.3168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,65536,1.5471
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,64,0.3162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,128,0.3640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,512,0.5204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,256,0.4129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,4096,1.1411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,8192,1.2400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,1024,0.6980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,2048,1.0958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,8,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,16384,1.4583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,2,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,32768,1.8927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,16,0.0953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,64,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,32,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,128,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,512,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,1024,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,8192,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,2048,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,4,0.0912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,256,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,4096,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,65536,0.2106
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,2,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,32768,0.1518
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,4,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,16,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,32,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,64,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,128,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,512,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,1024,0.1039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,2048,0.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,256,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,4096,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,131072,0.3160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,16384,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,8,0.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,65536,0.2179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,16384,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,131072,0.3251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,32768,0.1572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,2,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,8192,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,16,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,4,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,64,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,256,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,128,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,8,0.1023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,32,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,1024,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,4096,0.1199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,8192,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,2048,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,32768,0.1625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,65536,0.2262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,131072,0.3377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,16384,0.1354
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,4,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,16,0.1022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,8,0.1018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,512,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,128,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,64,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,256,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,2,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,512,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,1024,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,2048,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,4096,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,8192,0.1285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,65536,0.2424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,32768,0.1723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,32,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,4,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,131072,0.3621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,16,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,8,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,64,0.1039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,128,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,32,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,256,0.1057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,512,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,16384,0.1444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,2048,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,8192,0.1448
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,16384,0.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,2,0.1039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,32768,0.1909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,1024,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,131072,0.3919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,2,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,65536,0.2632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,16,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,4096,0.1332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,64,0.1056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,128,0.1086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,8,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,4,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,512,0.1122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,1024,0.1200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,32,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,4096,0.1523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,16384,0.1735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,8192,0.1584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,65536,0.2915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,32768,0.2091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,2,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,4,0.1088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,131072,0.4409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,2048,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,32,0.1076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,8,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,64,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,256,0.1102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,128,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,1024,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,16,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,256,0.1165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,512,0.1219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,16384,0.2036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,8192,0.1839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,32768,0.2496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,2048,0.1690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,131072,0.5417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,4,0.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,8,0.1165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,2,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,16,0.1148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,4096,0.1752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,64,0.1168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,32,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,65536,0.3516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,1024,0.1592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,256,0.1296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,2048,0.2053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,4096,0.2130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,16384,0.2565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,32768,0.3248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,512,0.1431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,65536,0.4691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,4,0.1398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,131072,0.7388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,8192,0.2273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,8,0.1366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,16,0.1385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,32,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,2,0.1367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,64,0.1409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,256,0.1607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,512,0.1807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,128,0.1198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,4096,0.2996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,2048,0.2861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,1024,0.2144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,128,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,8192,0.3292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,65536,0.7316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,2,0.1771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,4,0.1747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,16384,0.3836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,32768,0.5032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,32,0.1769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,128,0.1941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,16,0.1758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,131072,1.2878
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,256,0.2111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,8,0.1759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,1024,0.3097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,64,0.1767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,2048,0.4499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,8192,0.5315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,16384,0.6421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,512,0.2446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,2,0.2531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,8,0.2526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,65536,1.3969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,16,0.2527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,32768,0.8745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,4096,0.4799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,32,0.2527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,64,0.2562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,4,0.2525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,512,0.3744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,1024,0.4940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,4096,0.8065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,256,0.3160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,16384,1.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,2,0.0886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,32768,1.5528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,8192,0.9122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,2048,0.7467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,8,0.0887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,128,0.2841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,16,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,32,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,4,0.0895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,64,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,128,0.0887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,2048,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,1024,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,256,0.0901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,16384,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,8192,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,32768,0.1444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,4096,0.1056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,2,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,4,0.0961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,8,0.0960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,512,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,131072,0.3098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,64,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,128,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,32,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,65536,0.2011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,512,0.0976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,16,0.0934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,256,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,16384,0.1272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,8192,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,1024,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,32768,0.1561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,65536,0.2098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,2048,0.1122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,4,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,131072,0.3155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,8,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,4096,0.1142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,64,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,32,0.0958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,2,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,512,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,1024,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,256,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,128,0.0977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,16,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,8192,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,2048,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,16384,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,2,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,65536,0.2198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,131072,0.3320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,4096,0.1150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,16,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,4,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,32768,0.1586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,64,0.0979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,128,0.0985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,8,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,256,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,512,0.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,4096,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,2048,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,8192,0.1244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,16384,0.1347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,131072,0.3552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,65536,0.2353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,32,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,2,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,8,0.1006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,1024,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,16,0.1002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,32768,0.1610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,64,0.0998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,4,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,32,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,1024,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,128,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,4096,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,512,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,2048,0.1187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,256,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,32768,0.1867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,8192,0.1335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,2,0.1020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,65536,0.2575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,8,0.1027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,131072,0.3870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,32,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,64,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,128,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,16384,0.1505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,256,0.1044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,4,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,2048,0.1367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,4096,0.1434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,16,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,8192,0.1521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,32768,0.2040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,16384,0.1696
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,131072,0.4357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,65536,0.2877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,4,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,1024,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,512,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,32,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,128,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,64,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,16,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,512,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,1024,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,2048,0.1615
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,2,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,256,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,8192,0.1798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,16384,0.1987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,8,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,4096,0.1688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,131072,0.5362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,65536,0.3456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,16,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,8,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,4,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,32,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,64,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,2,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,32768,0.2446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,512,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,256,0.1190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,1024,0.1542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,8192,0.2216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,16384,0.2520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,4096,0.2059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,128,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,65536,0.4622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,2048,0.1996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,4,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,32768,0.3203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,2,0.1287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,131072,0.7355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,32,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,64,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,16,0.1289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,256,0.1543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,128,0.1407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,8,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,512,0.1712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,1024,0.2036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,8192,0.3182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,32768,0.4924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,2048,0.2786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,4096,0.2909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,16384,0.3754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,65536,0.7238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,131072,1.2785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,16,0.1669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,4,0.1646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,8,0.1639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,32,0.1671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,2,0.1631
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,128,0.1848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,64,0.1670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,2048,0.4370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,8192,0.5186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,512,0.2356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,16384,0.6324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,256,0.2016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,4096,0.4686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,1024,0.2944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,2,0.2357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,65536,1.3726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,32,0.2364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,32768,0.8615
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,64,0.2399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,4,0.2362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,128,0.2653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,8,0.2361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,256,0.2938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,512,0.3552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,2048,0.7271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,1024,0.4679
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,8192,0.8862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,4096,0.7805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,16384,1.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,16,0.2364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,8,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,2,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,32,0.0848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,64,0.0868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,32768,1.5301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,256,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,16,0.0848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,1024,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,128,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,4,0.0867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,8192,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,4096,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,32768,0.1455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,65536,0.2026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,2048,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,2,0.0953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,16384,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,131072,0.3020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,8,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,16,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,64,0.0953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,4,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,512,0.0885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,512,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,256,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,1024,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,4096,0.1125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,2048,0.1042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,32,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,32768,0.1519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,16384,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,8192,0.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,2,0.0953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,8,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,128,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,16,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,4,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,32,0.0946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,128,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,65536,0.2117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,131072,0.3128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,64,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,1024,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,256,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,8192,0.1190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,512,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,4096,0.1129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,2048,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,32768,0.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,16384,0.1284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,2,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,4,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,131072,0.3248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,65536,0.2161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,16,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,64,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,256,0.0984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,512,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,1024,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,2048,0.1122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,128,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,8192,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,16384,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,65536,0.2329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,32,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,32768,0.1605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,2,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,131072,0.3529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,8,0.0953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,4,0.0958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,16,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,8,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,64,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,128,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,4096,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,2048,0.1192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,256,0.0986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,4096,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,32,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,32768,0.1837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,1024,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,512,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,131072,0.3850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,8192,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,2,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,8,0.1002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,16,0.1006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,4,0.1001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,16384,0.1459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,64,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,256,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,512,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,1024,0.1080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,32,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,2048,0.1329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,8192,0.1517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,16384,0.1668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,128,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,65536,0.2532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,65536,0.2852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,4096,0.1394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,4,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,8,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,131072,0.4318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,32,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,64,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,128,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,16,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,32768,0.2016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,256,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,2048,0.1605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,512,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,1024,0.1281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,8192,0.1750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,2,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,32768,0.2421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,131072,0.5321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,2,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,4,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,4096,0.1655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,16,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,16384,0.1950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,128,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,32,0.1081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,65536,0.3442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,256,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,1024,0.1506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,512,0.1328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,2048,0.1975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,64,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,8,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,4096,0.2033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,32768,0.3159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,65536,0.4591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,16384,0.2502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,4,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,2,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,131072,0.7321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,8,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,32,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,128,0.1346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,16,0.1263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,8192,0.2193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,64,0.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,2048,0.2732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,1024,0.2022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,4096,0.2882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,8192,0.3149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,32768,0.4921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,16384,0.3713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,2,0.1591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,65536,0.7220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,512,0.1683
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,256,0.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,8,0.1624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,4,0.1603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,32,0.1606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,64,0.1637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,16,0.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,1024,0.2898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,256,0.1988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,131072,1.2743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,4096,0.4659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,2048,0.4324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,512,0.2328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,128,0.1817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,16384,0.6252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,4,0.2281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,2,0.2285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,65536,1.3755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,32768,0.8579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,16,0.2290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,8192,0.5178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,32,0.2284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,64,0.2310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,8,0.2304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,256,0.2877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,1024,0.4604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,8192,0.8737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,512,0.3453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,16384,1.0896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,2048,0.7160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,2,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,32768,1.5249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,4,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,8,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,4096,0.7731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,16,0.0845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,32,0.0829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,128,0.2603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,256,0.0870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,1024,0.0886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,2048,0.0919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,512,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,8192,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,16384,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,128,0.0890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,65536,0.1984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,32768,0.1431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,131072,0.3023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,4,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,64,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,4096,0.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,2,0.0923
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,16,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,128,0.0936
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,256,0.0959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,32,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,64,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,2048,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,512,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,8,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,16384,0.1241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,32768,0.1505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,65536,0.2077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,131072,0.3081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,8192,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,4,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,8,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,16,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,4096,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,1024,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,2,0.0937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,32,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,512,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,64,0.0961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,256,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,4096,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,8192,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,128,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,32768,0.1544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,65536,0.2138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,131072,0.3274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,16384,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,1024,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,2,0.0935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,2048,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,32,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,64,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,128,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,512,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,4,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,8,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,2048,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,1024,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,256,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,16384,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,8192,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,65536,0.2253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,4096,0.1128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,32768,0.1578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,4,0.0965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,16,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,2,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,32,0.0957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,8,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,131072,0.3540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,512,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,64,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,16,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,2048,0.1139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,128,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,256,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,16384,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,32768,0.1774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,65536,0.2517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,4096,0.1211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,8192,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,4,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,16,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,1024,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,64,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,32,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,2,0.0977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,128,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,256,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,131072,0.3822
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,8,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,8192,0.1465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,4096,0.1370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,2048,0.1313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,32768,0.2016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,16384,0.1647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,512,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,131072,0.4301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,1024,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,4,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,2,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,65536,0.2855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,8,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,128,0.1026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,64,0.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,16,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,512,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,256,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,32,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,4096,0.1628
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,8192,0.1726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,1024,0.1242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,32768,0.2391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,2048,0.1578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,2,0.1069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,4,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,16384,0.1937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,16,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,64,0.1068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,128,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,8,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,131072,0.5317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,512,0.1286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,2048,0.1950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,65536,0.3427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,4096,0.2041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,8192,0.2177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,1024,0.1504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,32,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,32768,0.3152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,256,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,16384,0.2491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,65536,0.4574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,4,0.1253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,16,0.1244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,8,0.1251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,2,0.1242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,128,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,32,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,512,0.1664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,131072,0.7311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,256,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,64,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,4096,0.2854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,1024,0.1971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,2048,0.2706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,8192,0.3144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,16384,0.3707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,4,0.1585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,2,0.1568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,32768,0.4892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,16,0.1586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,65536,0.7190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,8,0.1564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,64,0.1626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,512,0.2292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,131072,1.2742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,32,0.1587
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,128,0.1787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,2048,0.4337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,4096,0.4621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,256,0.1976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,8192,0.5145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,16384,0.6250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,4,0.2264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,32768,0.8526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,65536,1.3530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,1024,0.2872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,16,0.2260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,8,0.2258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,32,0.2260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,64,0.2292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,512,0.3404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,2,0.2277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,128,0.2548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,1024,0.4543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,8192,0.8750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,2048,0.7126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,256,0.2838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,2,0.0850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,32768,1.5219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,16,0.0846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,16384,1.0815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,4,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,8,0.0827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,4096,0.7678
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,32,0.0829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,64,0.0848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,2048,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,128,0.0850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,256,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,1024,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,8192,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,16384,0.1165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,65536,0.1975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,512,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,131072,0.3036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,2,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,4096,0.1003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,16,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,32,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,64,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,8,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,32768,0.1375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,256,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,512,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,4,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,1024,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,8192,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,4096,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,2048,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,16384,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,131072,0.3143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,2,0.0912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,8,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,65536,0.2073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,32768,0.1492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,4,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,64,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,16,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,128,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,256,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,1024,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,32,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,2048,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,512,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,128,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,32768,0.1533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,16384,0.1250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,131072,0.3198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,4096,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,65536,0.2139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,2,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,8,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,4,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,16,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,32,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,128,0.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,8192,0.1168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,1024,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,64,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,2048,0.1084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,4096,0.1110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,16384,0.1291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,256,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,512,0.0954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,131072,0.3492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,2,0.0957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,32768,0.1577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,8,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,65536,0.2302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,8192,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,64,0.0933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,32,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,16,0.0959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,4,0.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,512,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,128,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,4096,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,8192,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,256,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,16384,0.1399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,1024,0.1021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,2048,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,131072,0.3791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,4,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,65536,0.2527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,16,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,32,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,2,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,128,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,64,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,256,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,1024,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,512,0.1042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,2048,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,8192,0.1464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,16384,0.1626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,32768,0.1806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,4096,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,8,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,32768,0.1994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,131072,0.4300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,65536,0.2830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,2,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,32,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,16,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,128,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,256,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,8,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,1024,0.1244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,2048,0.1589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,4096,0.1611
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,4,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,16384,0.1925
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,64,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,65536,0.3427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,8192,0.1732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,2,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,512,0.1086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,131072,0.5301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,32768,0.2411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,16,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,8,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,128,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,64,0.1057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,256,0.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,32,0.1065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,512,0.1288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,1024,0.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,2048,0.1929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,8192,0.2168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,4096,0.2001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,32768,0.3158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,16384,0.2477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,65536,0.4584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,8,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,131072,0.7259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,4,0.1240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,16,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,2,0.1253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,4,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,128,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,32,0.1239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,512,0.1663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,2048,0.2716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,256,0.1469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,1024,0.1994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,64,0.1242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,16384,0.3698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,8192,0.3099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,4096,0.2845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,32768,0.4880
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,4,0.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,65536,0.7187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,16,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,8,0.1576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,32,0.1582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,64,0.1606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,2,0.1547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,256,0.1953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,2048,0.4302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,512,0.2287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,131072,1.2733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,4096,0.4616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,128,0.1792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,8192,0.5109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,1024,0.2844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,16384,0.6220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,8,0.2255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,32768,0.8549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,2,0.2239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,65536,1.3810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,16,0.2239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,32,0.2242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,4,0.2252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,64,0.2278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,128,0.2546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,512,0.3401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,2048,0.7111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,4096,0.7654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,1024,0.4516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,256,0.2824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,2,0.1673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,16384,1.0829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,8192,0.8697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,8,0.1668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,4,0.1653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,16,0.1681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,32,0.1643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,128,0.1664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,64,0.1659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,256,0.1693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,512,0.1735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,1024,0.1739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,8192,0.1919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,2048,0.1790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,65536,0.2910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,16384,0.2033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,32768,1.5131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,32768,0.2292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,4096,0.1858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,131072,0.3899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,2,0.1736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,16,0.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,4,0.1738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,64,0.1747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,128,0.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,256,0.1742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,8,0.1738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,1024,0.1783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,4096,0.1992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,32,0.1743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,512,0.1770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,32768,0.2379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,8192,0.1991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,131072,0.4012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,2048,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,2,0.1752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,4,0.1768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,16,0.1736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,65536,0.2953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,32,0.1754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,64,0.1748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,128,0.1766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,8,0.1743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,256,0.1753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,512,0.1786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,4096,0.2014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,8192,0.2031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,1024,0.1792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,32768,0.2372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,16384,0.2141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,131072,0.4068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,65536,0.3002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,2,0.1768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,8,0.1762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,4,0.1755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,16,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,16384,0.2107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,128,0.1775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,64,0.1761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,32,0.1755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,512,0.1800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,2048,0.1972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,4096,0.2002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,2048,0.1972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,1024,0.1838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,256,0.1787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,65536,0.3055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,2,0.1782
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,16384,0.2174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,8,0.1762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,8192,0.2068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,16,0.1771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,32768,0.2422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,128,0.1801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,131072,0.4222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,4,0.1789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,512,0.1828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,32,0.1785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,2048,0.1984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,256,0.1815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,4096,0.2036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,1024,0.1877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,16384,0.2212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,64,0.1769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,65536,0.3185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,8192,0.2100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,131072,0.4533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,32768,0.2515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,16,0.1811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,8,0.1798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,4,0.1808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,64,0.1816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,32,0.1811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,512,0.1899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,2,0.1795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,128,0.1877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,256,0.1879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,8192,0.2193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,16384,0.2342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,2048,0.2074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,65536,0.3533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,131072,0.5010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,2,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,32768,0.2688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,8,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,4,0.1874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,32,0.1881
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,1024,0.1909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,16,0.1895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,128,0.1888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,256,0.1908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,4096,0.2117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,64,0.1873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,2048,0.2196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,4096,0.2240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,16384,0.2543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,1024,0.1985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,512,0.1929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,32768,0.3015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,65536,0.4035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,2,0.2057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,8192,0.2366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,8,0.2054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,4,0.2053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,32,0.2044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,16,0.2055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,128,0.2076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,512,0.2166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,256,0.2133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,64,0.2044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,131072,0.5945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,1024,0.2237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,8192,0.2800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,2048,0.2541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,16384,0.3103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,65536,0.5200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,32768,0.3782
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,131072,0.7916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,4,0.2418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,2,0.2427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,8,0.2417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,4096,0.2633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,16,0.2422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,128,0.2488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,512,0.2608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,64,0.2427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,256,0.2515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,2048,0.3316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,32,0.2424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,8192,0.3763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,32768,0.5507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,4096,0.3453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,1024,0.2796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,65536,0.7805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,16384,0.4315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,4,0.3300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,8,0.3268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,16,0.3292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,2,0.3285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,64,0.3316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,131072,1.3346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,512,0.3628
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,32,0.3290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,128,0.3360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,2048,0.4898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,1024,0.3972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,16384,0.6800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,8192,0.5700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,32768,0.9112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,256,0.3473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,2,0.5708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,8,0.5693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,65536,1.4123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,4,0.5690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,16,0.5701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,32,0.5680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,128,0.5800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,64,0.5704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,256,0.5980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,512,0.6318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,4096,0.5195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,8192,1.0155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,4096,0.9116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,1024,0.6986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,2,0.1295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,16384,1.2320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,2048,0.8629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,8,0.1266
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,32,0.1294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,32768,1.6643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,4,0.1282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,128,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,64,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,16,0.1266
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,2048,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,1024,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,4096,0.1484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,512,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,32768,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,256,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,8192,0.1534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,65536,0.2482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,2,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,131072,0.3539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,16,0.1330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,32,0.1355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,64,0.1351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,128,0.1364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,8,0.1326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,256,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,16384,0.1653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,1024,0.1403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,2048,0.1566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,4,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,8192,0.1591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,512,0.1342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,32768,0.1965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,16384,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,2,0.1379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,65536,0.2556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,8,0.1343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,131072,0.3582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,32,0.1376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,4,0.1336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,64,0.1371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,4096,0.1578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,256,0.1372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,128,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,512,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,16,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,1024,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,8192,0.1637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,32768,0.1992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,65536,0.2606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,131072,0.3672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,2,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,4,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,16384,0.1726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,8,0.1384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,2048,0.1550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,64,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,16,0.1367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,32,0.1376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,256,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,512,0.1429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,1024,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,4096,0.1599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,2048,0.1606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,8192,0.1667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,32768,0.2034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,65536,0.2654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,131072,0.3802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,2,0.1399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,16384,0.1770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,4,0.1387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,8,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,16,0.1379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,128,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,32,0.1394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,256,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,128,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,64,0.1398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,2048,0.1640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,1024,0.1461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,8192,0.1743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,4096,0.1619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,16384,0.1865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,65536,0.2852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,131072,0.4146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,2,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,4096,0.1688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,32768,0.2164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,4,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,32,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,512,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,8,0.1398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,16,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,512,0.1438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,256,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,1024,0.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,2048,0.1790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,128,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,64,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,8192,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,4096,0.1802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,16384,0.2013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,32768,0.2393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,8,0.1476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,4,0.1480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,131072,0.4727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,65536,0.3226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,16,0.1466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,64,0.1468
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,2,0.1467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,256,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,128,0.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,32,0.1459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,2048,0.2097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,4096,0.2135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,16384,0.2450
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,8192,0.2228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,32768,0.2901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,512,0.1620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,65536,0.3936
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,2,0.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,131072,0.5833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,4,0.1596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,8,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,64,0.1584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,1024,0.1715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,16,0.1596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,128,0.1680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,32,0.1611
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,512,0.1854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,1024,0.2100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,8192,0.2932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,2048,0.2689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,256,0.1750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,32768,0.3935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,4096,0.2766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,131072,0.8031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,65536,0.5336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,2,0.1891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,4,0.1888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,16,0.1902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,32,0.1893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,64,0.1891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,16384,0.3242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,128,0.2057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,256,0.2142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,8,0.1890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,1024,0.2783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,8192,0.4165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,512,0.2336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,2048,0.3726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,16384,0.4729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,32768,0.5899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,4096,0.3860
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,65536,0.8229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,2,0.2500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,16,0.2517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,4,0.2510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,32,0.2506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,131072,1.3839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,64,0.2510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,256,0.2995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,512,0.3425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,128,0.2813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,1024,0.4301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,4096,0.6492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,8192,0.7009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,2048,0.6224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,16384,0.8120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,2,0.4107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,32768,1.0431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,4,0.4082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,8,0.2504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,8,0.4126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,65536,1.5610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,32,0.4093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,128,0.4749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,256,0.5007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,512,0.5869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,64,0.4103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,1024,0.7666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,8192,1.2747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,2048,1.1210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,2,0.0998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,16384,1.4864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,4096,1.1726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,4,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,16,0.4114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,8,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,32,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,128,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,64,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,256,0.1043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,32768,1.9196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,16,0.1044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,1024,0.1047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,512,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,2048,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,8192,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,16384,0.1367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,131072,0.3179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,4096,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,65536,0.2177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,32768,0.1635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,4,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,8,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,128,0.1107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,64,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,16,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,32,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,256,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,1024,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,512,0.1090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,4096,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,8192,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,2048,0.1242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,32768,0.1736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,65536,0.2325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,131072,0.3332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,4,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,16384,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,16,0.1127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,32,0.1087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,2,0.1103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,64,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,128,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,256,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,1024,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,512,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,8,0.1105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,2048,0.1269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,16384,0.1435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,8192,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,4096,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,32768,0.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,65536,0.2411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,2,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,8,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,4,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,16,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,131072,0.3476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,128,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,256,0.1125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,64,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,512,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,32,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,2048,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,1024,0.1148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,16384,0.1544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,4096,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,65536,0.2472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,8192,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,32768,0.1832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,131072,0.3645
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,8,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,4,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,32,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,64,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,2,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,128,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,16,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,512,0.1212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,256,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,1024,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,2048,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,32768,0.1919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,131072,0.3915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,4096,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,2,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,65536,0.2643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,8192,0.1460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,8,0.1169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,32,0.1139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,4,0.1143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,16,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,256,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,128,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,1024,0.1272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,64,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,512,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,2048,0.1481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,32768,0.2101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,16384,0.1753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,65536,0.2930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,8192,0.1603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,2,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,4096,0.1533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,4,0.1189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,131072,0.4424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,32,0.1207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,64,0.1191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,16,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,256,0.1240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,8,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,1024,0.1387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,128,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,4096,0.1690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,8192,0.1791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,16384,0.1994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,512,0.1313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,32768,0.2469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,65536,0.3463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,2048,0.1641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,2,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,8,0.1300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,4,0.1305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,32,0.1315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,128,0.1355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,16,0.1308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,131072,0.5381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,256,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,64,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,2048,0.1995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,512,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,1024,0.1622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,8192,0.2254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,16384,0.1607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,16384,0.2547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,4096,0.2074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,32768,0.3225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,65536,0.4627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,2,0.1525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,131072,0.7343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,8,0.1542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,4,0.1544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,16,0.1543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,128,0.1641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,32,0.1538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,512,0.1822
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,1024,0.2076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,2048,0.2730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,256,0.1687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,8192,0.3150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,64,0.1545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,16384,0.3724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,65536,0.7187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,32768,0.4902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,4,0.1951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,2,0.1952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,8,0.1955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,131072,1.2784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,4096,0.2858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,16,0.1951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,64,0.1951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,128,0.2120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,512,0.2450
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,1024,0.2938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,2048,0.4113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,32,0.1943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,8192,0.4889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,256,0.2201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,32768,0.8297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,16384,0.6017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,2,0.2970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,4,0.2952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,65536,1.3316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,4096,0.4370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,8,0.2973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,16,0.2959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,128,0.3305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,64,0.2960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,32,0.2967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,1024,0.4913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,2048,0.7065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,8192,0.8607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,256,0.3498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,2,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,16384,1.0755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,512,0.3979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,4096,0.7542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,8,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,32768,1.5029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,32,0.0917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,16,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,64,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,4,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,128,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,512,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,1024,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,256,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,2048,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,4096,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,8192,0.1139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,32768,0.1520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,2,0.1020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,4,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,131072,0.3083
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,16,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,16384,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,8,0.1023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,32,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,65536,0.2121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,128,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,512,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,256,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,4096,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,1024,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,8192,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,64,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,32768,0.1593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,65536,0.2153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,131072,0.3246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,2,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,8,0.1025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,4,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,16,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,2048,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,64,0.1028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,256,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,16384,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,128,0.1026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,32,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,1024,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,4096,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,16384,0.1336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,2048,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,512,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,8192,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,65536,0.2211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,4,0.1022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,2,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,16,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,131072,0.3379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,32,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,8,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,32768,0.1606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,256,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,128,0.1041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,2048,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,64,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,512,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,16384,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,65536,0.2376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,131072,0.3541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,1024,0.1058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,2,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,8192,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,32768,0.1687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,4096,0.1201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,8,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,32,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,4,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,256,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,16,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,512,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,1024,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,128,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,4096,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,2048,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,32768,0.1827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,64,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,8192,0.1312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,4,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,2,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,8,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,16384,0.1481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,65536,0.2534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,64,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,256,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,128,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,2048,0.1281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,16,0.1067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,512,0.1100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,4096,0.1332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,8192,0.1438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,1024,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,65536,0.2806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,32768,0.1954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,2,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,4,0.1089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,16384,0.1609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,16,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,64,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,131072,0.4278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,128,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,32,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,512,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,256,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,8,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,2048,0.1478
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,4096,0.1527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,16384,0.1828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,32768,0.2318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,1024,0.1225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,65536,0.3318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,8192,0.1605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,131072,0.5215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,8,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,16,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,2,0.1140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,32,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,4,0.1137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,64,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,512,0.1275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,128,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,256,0.1206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,4096,0.1752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,8192,0.1914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,16384,0.2219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,2048,0.1671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,32768,0.2900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,65536,0.4305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,2,0.1326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,1024,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,8,0.1351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,4,0.1324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,16,0.1341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,32,0.1349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,131072,0.7019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,128,0.1426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,256,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,1024,0.1735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,64,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,2048,0.2159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,512,0.1557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,8192,0.2604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,16384,0.3170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,32768,0.4352
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,4,0.1688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,8,0.1688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,16,0.1698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,32,0.1686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,65536,0.6635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,2,0.1688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,128,0.1794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,256,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,4096,0.2312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,64,0.1702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,512,0.2042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,1024,0.2372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,4096,0.3570
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,16384,0.5181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,2048,0.3278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,2,0.2419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,65536,1.2603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,32768,0.7476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,8192,0.4074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,4,0.2425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,16,0.2415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,8,0.2410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,32,0.2410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,128,0.2608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,256,0.2768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,512,0.3081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,64,0.2418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,8192,0.6776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,4096,0.5732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,1024,0.3677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,16384,0.8919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,2048,0.5218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,2,0.0890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,4,0.0867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,32768,1.3233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,8,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,16,0.0867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,32,0.0887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,64,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,256,0.0887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,128,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,512,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,2048,0.1023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,4096,0.1039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,8192,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,32768,0.1456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,1024,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,131072,0.3012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,65536,0.2074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,2,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,4,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,16,0.0965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,16384,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,32,0.0964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,131072,1.2216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,8,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,64,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,256,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,512,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,2048,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,128,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,16384,0.1280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,1024,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,65536,0.2139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,4096,0.1148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,131072,0.3115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,8192,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,32768,0.1539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,8,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,2,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,16,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,128,0.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,4,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,512,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,64,0.0962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,2048,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,32,0.0976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,4096,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,16384,0.1281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,8192,0.1207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,1024,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,65536,0.2159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,131072,0.3301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,2,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,4,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,16,0.0978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,32,0.0985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,128,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,64,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,8,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,512,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,1024,0.1028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,2048,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,4096,0.1161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,256,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,256,0.0983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,8192,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,32768,0.1585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,65536,0.2289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,32768,0.1543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,4,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,16384,0.1308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,16,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,2,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,8,0.0999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,32,0.0980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,128,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,512,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,131072,0.3510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,2048,0.1160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,256,0.1005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,4096,0.1184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,64,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,32768,0.1772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,16384,0.1391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,1024,0.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,65536,0.2488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,2,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,8192,0.1261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,8,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,16,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,128,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,131072,0.3781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,32,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,512,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,4,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,2048,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,4096,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,256,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,16384,0.1544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,1024,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,8192,0.1353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,131072,0.4219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,65536,0.2729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,32768,0.1891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,8,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,4,0.1039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,2,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,16,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,64,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,64,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,128,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,512,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,1024,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,32,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,8192,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,4096,0.1446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,32768,0.2236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,65536,0.3245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,256,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,16384,0.1752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,4,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,2,0.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,131072,0.5153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,32,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,16,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,2048,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,8,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,256,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,1024,0.1323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,512,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,128,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,8192,0.1863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,64,0.1079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,4096,0.1688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,65536,0.4253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,16384,0.2166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,2,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,32768,0.2834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,2048,0.1611
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,4,0.1234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,8,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,64,0.1260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,32,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,256,0.1385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,1024,0.1636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,16,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,131072,0.6951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,4096,0.2228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,512,0.1486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,128,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,2048,0.2097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,16384,0.3081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,2,0.1551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,8192,0.2507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,65536,0.6555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,32768,0.4270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,16,0.1577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,8,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,4,0.1558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,32,0.1554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,64,0.1575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,1024,0.2253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,512,0.1943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,131072,1.2127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,4096,0.3433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,128,0.1682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,256,0.1770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,8192,0.3939
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,16384,0.5065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,2048,0.3157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,4,0.2198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,8,0.2199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,16,0.2199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,2,0.2199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,128,0.2402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,32768,0.7361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,512,0.2856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,65536,1.2341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,32,0.2199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,64,0.2210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,2048,0.4986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,256,0.2548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,1024,0.3440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,4096,0.5480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,32768,1.2984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,2,0.0881
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,16384,0.8681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,16,0.0890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,4,0.0868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,8192,0.6541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,8,0.0867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,32,0.0873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,64,0.0868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,128,0.0897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,2048,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,4096,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,512,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,16384,0.1189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,256,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,32768,0.1460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,1024,0.0887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,8192,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,65536,0.1997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,131072,0.2990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,4,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,32,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,128,0.0937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,16,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,2,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,64,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,256,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,1024,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,512,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,4096,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,16384,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,8192,0.1181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,32768,0.1489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,8,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,2,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,65536,0.2096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,2048,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,4,0.0961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,16,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,32,0.0956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,8,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,512,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,1024,0.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,128,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,256,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,64,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,16384,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,2048,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,131072,0.3149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,32768,0.1542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,131072,0.3250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,2,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,65536,0.2117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,4,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,4096,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,32,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,64,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,8,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,256,0.0985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,16,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,512,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,2048,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,8192,0.1159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,8192,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,1024,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,16384,0.1286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,131072,0.3462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,32768,0.1595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,2,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,4,0.0977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,65536,0.2271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,128,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,8,0.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,4096,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,32,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,128,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,64,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,256,0.0981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,2048,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,4096,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,1024,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,8192,0.1220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,16,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,512,0.0978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,131072,0.3762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,32768,0.1731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,4,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,8,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,65536,0.2456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,16384,0.1351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,64,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,128,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,256,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,512,0.1020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,16,0.0984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,32,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,4096,0.1198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,2,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,2048,0.1192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,8192,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,1024,0.1046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,16384,0.1517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,32768,0.1873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,65536,0.2708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,4,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,131072,0.4197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,2,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,32,0.1017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,128,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,8,0.1018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,16,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,2048,0.1373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,64,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,256,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,1024,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,8192,0.1534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,512,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,4096,0.1411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,131072,0.5107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,32768,0.2207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,4,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,65536,0.3225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,2,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,16384,0.1739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,32,0.1057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,64,0.1067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,128,0.1086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,512,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,1024,0.1300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,256,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,4096,0.1689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,8192,0.1838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,16,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,2048,0.1584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,32768,0.2811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,16384,0.2125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,131072,0.6935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,2,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,8,0.1213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,16,0.1205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,8,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,65536,0.4219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,128,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,4,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,64,0.1213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,256,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,1024,0.1619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,4096,0.2205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,8192,0.2469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,32768,0.4232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,512,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,32,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,2048,0.2060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,16384,0.3061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,4,0.1499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,8,0.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,65536,0.6529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,32,0.1519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,128,0.1643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,2,0.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,64,0.1526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,16,0.1522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,1024,0.2201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,2048,0.3106
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,512,0.1887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,8192,0.3912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,16384,0.5002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,256,0.1734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,2,0.2120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,32768,0.7290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,4,0.2113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,16,0.2119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,4096,0.3380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,65536,1.2288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,32,0.2110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,64,0.2127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,128,0.2321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,1024,0.3334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,256,0.2467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,131072,1.2075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,512,0.2749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,8,0.2119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,4096,0.5384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,2048,0.4874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,2,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,16,0.0846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,32768,1.2875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,8192,0.6453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,32,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,16384,0.8581
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,64,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,128,0.0855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,4,0.0862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,256,0.0866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,1024,0.0886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,2048,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,512,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,4096,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,8192,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,32768,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,131072,0.2973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,16384,0.1171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,2,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,4,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,8,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,65536,0.2015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,16,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,64,0.0934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,32,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,256,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,128,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,2048,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,512,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,4096,0.1107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,16384,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,32768,0.1507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,1024,0.0944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,8192,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,2,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,65536,0.2075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,4,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,16,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,64,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,131072,0.3143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,128,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,256,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,512,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,1024,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,2048,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,8,0.0943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,32,0.0926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,8192,0.1144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,32768,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,4096,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,65536,0.2136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,131072,0.3227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,16384,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,8,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,4,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,2,0.0940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,128,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,64,0.0960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,1024,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,512,0.0955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,32,0.0943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,2048,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,16384,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,4096,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,32768,0.1561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,65536,0.2238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,8192,0.1178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,131072,0.3461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,2,0.0939
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,256,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,16,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,8,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,4,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,32,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,64,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,128,0.0953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,512,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,256,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,4096,0.1166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,8192,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,2048,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,16384,0.1313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,65536,0.2425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,1024,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,2,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,32768,0.1707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,131072,0.3743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,4,0.0987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,64,0.0960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,32,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,8,0.0965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,256,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,512,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,1024,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,16,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,4096,0.1189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,128,0.0981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,16384,0.1504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,8192,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,131072,0.4175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,32768,0.1861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,2048,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,65536,0.2694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,8,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,16,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,16,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,32,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,4,0.0997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,64,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,2,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,256,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,1024,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,128,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,4096,0.1403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,512,0.1040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,2048,0.1356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,65536,0.3204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,4,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,131072,0.5097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,8192,0.1511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,8,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,16,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,32768,0.2189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,32,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,16384,0.1717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,2,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,512,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,64,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,1024,0.1291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,4096,0.1657
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,8192,0.1807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,128,0.1067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,2048,0.1586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,32768,0.2795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,131072,0.6924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,65536,0.4213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,16384,0.2118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,8,0.1199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,16,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,4,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,32,0.1198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,128,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,2,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,64,0.1198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,512,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,256,0.1306
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,4096,0.2173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,8192,0.2460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,2048,0.2045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,1024,0.1600
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,16384,0.3045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,256,0.1081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,2,0.1492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,8,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,4,0.1484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,65536,0.6526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,32768,0.4242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,64,0.1536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,32,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,16,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,256,0.1723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,128,0.1605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,1024,0.2182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,4096,0.3355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,2048,0.3086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,512,0.1888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,8192,0.3874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,2,0.2076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,65536,1.2415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,32768,0.7270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,16384,0.4999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,16,0.2076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,8,0.2079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,4,0.2089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,32,0.2087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,64,0.2082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,128,0.2277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,1024,0.3297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,512,0.2715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,2048,0.4838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,256,0.2413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,16384,0.8515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,4096,0.5344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,8192,0.6380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,2,0.0847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,32768,1.2846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,4,0.0847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,8,0.0868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,32,0.0823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,64,0.0829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,128,0.0850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,256,0.0867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,1024,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,512,0.0890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,8192,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,4096,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,2048,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,16384,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,65536,0.2015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,32768,0.1434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,2,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,4,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,8,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,16,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,131072,0.2991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,32,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,256,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,128,0.0921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,64,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,1024,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,2048,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,512,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,16384,0.1212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,65536,0.2075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,131072,0.3138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,2,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,4,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,4096,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,8,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,32768,0.1467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,16,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,64,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,128,0.0937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,32,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,512,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,1024,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,2048,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,4096,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,8192,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,16384,0.1234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,32768,0.1520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,256,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,65536,0.2091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,131072,0.3201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,4,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,2,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,16,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,32,0.0926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,8,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,64,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,1024,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,2048,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,512,0.0958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,4096,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,128,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,16384,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,8192,0.1162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,65536,0.2205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,131072,0.3466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,32768,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,2,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,8,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,4,0.0953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,32,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,64,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,16,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,256,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,512,0.0956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,128,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,2048,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,8192,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,1024,0.1004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,4096,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,8192,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,32768,0.1672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,65536,0.2419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,2,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,16384,0.1324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,4,0.0979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,32,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,16,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,128,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,256,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,64,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,8,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,512,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,1024,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,4096,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,2048,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,16384,0.1486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,65536,0.2681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,32768,0.1845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,8192,0.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,131072,0.4180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,2,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,8,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,4,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,16,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,32,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,64,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,256,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,512,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,1024,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,2048,0.1352
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,128,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,4096,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,8192,0.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,32768,0.2160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,65536,0.3199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,256,0.0954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,131072,0.5095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,16384,0.1708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,4,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,2,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,16,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,8,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,128,0.1060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,256,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,64,0.1041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,2048,0.1564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,512,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,8192,0.1805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,1024,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,32,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,16384,0.2103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,32768,0.2779
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,131072,0.6924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,65536,0.4210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,4096,0.1645
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,8,0.1198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,2,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,16,0.1183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,32,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,128,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,512,0.1435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,1024,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,4,0.1179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,4096,0.2171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,2048,0.2032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,32768,0.4208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,8192,0.2449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,16384,0.3027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,2,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,131072,1.2076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,8,0.1496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,256,0.1293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,65536,0.6500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,4,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,32,0.1460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,16,0.1472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,128,0.1603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,64,0.1525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,256,0.1698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,512,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,4096,0.3330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,2048,0.3060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,1024,0.2179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,16384,0.4974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,8192,0.3862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,4,0.2057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,65536,1.2465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,8,0.2056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,32768,0.7257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,16,0.2064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,2,0.2070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,64,0.2063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,32,0.2087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,128,0.2255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,256,0.2404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,2048,0.4823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,4096,0.5319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,512,0.2694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,1024,0.3281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,16384,0.8491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,8192,0.6358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,32768,1.2803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,64,0.1185
